35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
114 template<
typename MT1
145 template<
typename T1,
typename T2,
typename T3 >
146 struct UseSMPAssignKernel {
147 enum { value = evaluateLeft || evaluateRight };
157 template<
typename T1,
typename T2,
typename T3 >
158 struct UseSinglePrecisionKernel {
159 enum { value = IsFloat<typename T1::ElementType>::value &&
160 IsFloat<typename T2::ElementType>::value &&
161 IsFloat<typename T3::ElementType>::value };
171 template<
typename T1,
typename T2,
typename T3 >
172 struct UseDoublePrecisionKernel {
173 enum { value = IsDouble<typename T1::ElementType>::value &&
174 IsDouble<typename T2::ElementType>::value &&
175 IsDouble<typename T3::ElementType>::value };
186 template<
typename T1,
typename T2,
typename T3 >
187 struct UseSinglePrecisionComplexKernel {
188 typedef complex<float> Type;
189 enum { value = IsSame<typename T1::ElementType,Type>::value &&
190 IsSame<typename T2::ElementType,Type>::value &&
191 IsSame<typename T3::ElementType,Type>::value };
202 template<
typename T1,
typename T2,
typename T3 >
203 struct UseDoublePrecisionComplexKernel {
204 typedef complex<double> Type;
205 enum { value = IsSame<typename T1::ElementType,Type>::value &&
206 IsSame<typename T2::ElementType,Type>::value &&
207 IsSame<typename T3::ElementType,Type>::value };
217 template<
typename T1,
typename T2,
typename T3 >
218 struct UseDefaultKernel {
219 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
220 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
221 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
222 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
232 template<
typename T1,
typename T2,
typename T3 >
233 struct UseVectorizedDefaultKernel {
234 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237 IntrinsicTrait<typename T1::ElementType>::addition &&
238 IntrinsicTrait<typename T1::ElementType>::subtraction &&
239 IntrinsicTrait<typename T1::ElementType>::multiplication };
270 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
276 enum { smpAssignable = !evaluateLeft && !evaluateRight };
306 if(
lhs_.columns() != 0UL ) {
307 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
309 for(
size_t k=1UL; k<end; k+=2UL ) {
311 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
313 if( end <
lhs_.columns() ) {
341 return rhs_.columns();
371 template<
typename T >
373 return (
lhs_.canAlias( alias ) ||
rhs_.canAlias( alias ) );
383 template<
typename T >
385 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
395 return lhs_.isAligned() &&
rhs_.isAligned();
427 template<
typename MT3
436 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
439 else if( rhs.
lhs_.columns() == 0UL ) {
454 DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
470 template<
typename MT3
474 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
477 DMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
479 DMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
495 template<
typename MT3
498 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
499 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
519 template<
typename MT3
522 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
523 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
525 const size_t M( A.rows() );
526 const size_t N( B.columns() );
527 const size_t K( A.columns() );
529 for(
size_t i=0UL; i<M; ++i ) {
530 for(
size_t j=0UL; j<N; ++j ) {
531 C(i,j) = A(i,0UL) * B(0UL,j);
533 for(
size_t k=1UL; k<K; ++k ) {
534 for(
size_t j=0UL; j<N; ++j ) {
535 C(i,j) += A(i,k) * B(k,j);
557 template<
typename MT3
560 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
561 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
563 typedef IntrinsicTrait<ElementType> IT;
565 const size_t M( A.rows() );
566 const size_t N( B.columns() );
567 const size_t K( A.columns() );
571 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
572 for(
size_t i=0UL; i<M; ++i ) {
573 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
574 for(
size_t k=0UL; k<K; ++k ) {
576 xmm1 = xmm1 + a1 * B.load(k,j );
577 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
578 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
579 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
580 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
581 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
582 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
583 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
585 (~C).
store( i, j , xmm1 );
586 (~C).
store( i, j+IT::size , xmm2 );
587 (~C).
store( i, j+IT::size*2UL, xmm3 );
588 (~C).
store( i, j+IT::size*3UL, xmm4 );
589 (~C).
store( i, j+IT::size*4UL, xmm5 );
590 (~C).
store( i, j+IT::size*5UL, xmm6 );
591 (~C).
store( i, j+IT::size*6UL, xmm7 );
592 (~C).
store( i, j+IT::size*7UL, xmm8 );
595 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
597 for( ; (i+2UL) <= M; i+=2UL ) {
598 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
599 for(
size_t k=0UL; k<K; ++k ) {
606 xmm1 = xmm1 + a1 * b1;
607 xmm2 = xmm2 + a1 * b2;
608 xmm3 = xmm3 + a1 * b3;
609 xmm4 = xmm4 + a1 * b4;
610 xmm5 = xmm5 + a2 * b1;
611 xmm6 = xmm6 + a2 * b2;
612 xmm7 = xmm7 + a2 * b3;
613 xmm8 = xmm8 + a2 * b4;
615 (~C).
store( i , j , xmm1 );
616 (~C).
store( i , j+IT::size , xmm2 );
617 (~C).
store( i , j+IT::size*2UL, xmm3 );
618 (~C).
store( i , j+IT::size*3UL, xmm4 );
619 (~C).
store( i+1UL, j , xmm5 );
620 (~C).
store( i+1UL, j+IT::size , xmm6 );
621 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 );
622 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 );
626 for(
size_t k=0UL; k<K; ++k ) {
628 xmm1 = xmm1 + a1 * B.load(k,j );
629 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
630 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
631 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
633 (~C).
store( i, j , xmm1 );
634 (~C).
store( i, j+IT::size , xmm2 );
635 (~C).
store( i, j+IT::size*2UL, xmm3 );
636 (~C).
store( i, j+IT::size*3UL, xmm4 );
639 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
641 for( ; (i+2UL) <= M; i+=2UL ) {
643 for(
size_t k=0UL; k<K; ++k ) {
648 xmm1 = xmm1 + a1 * b1;
649 xmm2 = xmm2 + a1 * b2;
650 xmm3 = xmm3 + a2 * b1;
651 xmm4 = xmm4 + a2 * b2;
653 (~C).
store( i , j , xmm1 );
654 (~C).
store( i , j+IT::size, xmm2 );
655 (~C).
store( i+1UL, j , xmm3 );
656 (~C).
store( i+1UL, j+IT::size, xmm4 );
660 for(
size_t k=0UL; k<K; ++k ) {
662 xmm1 = xmm1 + a1 * B.load(k,j );
663 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
665 (~C).
store( i, j , xmm1 );
666 (~C).
store( i, j+IT::size, xmm2 );
671 for( ; (i+2UL) <= M; i+=2UL ) {
673 for(
size_t k=0UL; k<K; ++k ) {
675 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
676 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
678 (~C).
store( i , j, xmm1 );
679 (~C).
store( i+1UL, j, xmm2 );
683 for(
size_t k=0UL; k<K; ++k ) {
684 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
686 (~C).
store( i, j, xmm1 );
707 template<
typename MT3
710 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
711 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
716 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
720 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
724 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
749 template<
typename MT3
752 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
753 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
755 selectDefaultAssignKernel( C, A, B );
775 template<
typename MT3
778 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
779 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
781 using boost::numeric_cast;
787 const int M ( numeric_cast<int>( A.rows() ) );
788 const int N ( numeric_cast<int>( B.columns() ) );
789 const int K ( numeric_cast<int>( A.columns() ) );
790 const int lda( numeric_cast<int>( A.spacing() ) );
791 const int ldb( numeric_cast<int>( B.spacing() ) );
792 const int ldc( numeric_cast<int>( C.spacing() ) );
794 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
795 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
796 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
797 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
818 template<
typename MT3
821 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
822 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
824 using boost::numeric_cast;
830 const int M ( numeric_cast<int>( A.rows() ) );
831 const int N ( numeric_cast<int>( B.columns() ) );
832 const int K ( numeric_cast<int>( A.columns() ) );
833 const int lda( numeric_cast<int>( A.spacing() ) );
834 const int ldb( numeric_cast<int>( B.spacing() ) );
835 const int ldc( numeric_cast<int>( C.spacing() ) );
837 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
838 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
839 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
840 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
861 template<
typename MT3
864 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
865 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
867 using boost::numeric_cast;
876 const int M ( numeric_cast<int>( A.rows() ) );
877 const int N ( numeric_cast<int>( B.columns() ) );
878 const int K ( numeric_cast<int>( A.columns() ) );
879 const int lda( numeric_cast<int>( A.spacing() ) );
880 const int ldb( numeric_cast<int>( B.spacing() ) );
881 const int ldc( numeric_cast<int>( C.spacing() ) );
882 const complex<float> alpha( 1.0F, 0.0F );
883 const complex<float> beta ( 0.0F, 0.0F );
885 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
886 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
887 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
888 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
909 template<
typename MT3
912 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
913 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
915 using boost::numeric_cast;
924 const int M ( numeric_cast<int>( A.rows() ) );
925 const int N ( numeric_cast<int>( B.columns() ) );
926 const int K ( numeric_cast<int>( A.columns() ) );
927 const int lda( numeric_cast<int>( A.spacing() ) );
928 const int ldb( numeric_cast<int>( B.spacing() ) );
929 const int ldc( numeric_cast<int>( C.spacing() ) );
930 const complex<double> alpha( 1.0, 0.0 );
931 const complex<double> beta ( 0.0, 0.0 );
933 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
934 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
935 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
936 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
954 template<
typename MT
960 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
972 const TmpType tmp( rhs );
991 template<
typename MT3
1000 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1014 DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1030 template<
typename MT3
1033 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1034 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1037 DMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1039 DMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1055 template<
typename MT3
1058 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1059 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1080 template<
typename MT3
1083 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1086 const size_t M( A.rows() );
1087 const size_t N( B.columns() );
1088 const size_t K( A.columns() );
1091 const size_t end( N &
size_t(-2) );
1093 for(
size_t i=0UL; i<M; ++i ) {
1094 for(
size_t k=0UL; k<K; ++k ) {
1095 for(
size_t j=0UL; j<end; j+=2UL ) {
1096 C(i,j ) += A(i,k) * B(k,j );
1097 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1100 C(i,end) += A(i,k) * B(k,end);
1122 template<
typename MT3
1125 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1126 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1128 typedef IntrinsicTrait<ElementType> IT;
1130 const size_t M( A.rows() );
1131 const size_t N( B.columns() );
1132 const size_t K( A.columns() );
1136 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1137 for(
size_t i=0UL; i<M; ++i ) {
1146 for(
size_t k=0UL; k<K; ++k ) {
1148 xmm1 = xmm1 + a1 * B.load(k,j );
1149 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1150 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1151 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1152 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1153 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1154 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1155 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1157 (~C).
store( i, j , xmm1 );
1158 (~C).
store( i, j+IT::size , xmm2 );
1159 (~C).
store( i, j+IT::size*2UL, xmm3 );
1160 (~C).
store( i, j+IT::size*3UL, xmm4 );
1161 (~C).
store( i, j+IT::size*4UL, xmm5 );
1162 (~C).
store( i, j+IT::size*5UL, xmm6 );
1163 (~C).
store( i, j+IT::size*6UL, xmm7 );
1164 (~C).
store( i, j+IT::size*7UL, xmm8 );
1167 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1169 for( ; (i+2UL) <= M; i+=2UL ) {
1178 for(
size_t k=0UL; k<K; ++k ) {
1185 xmm1 = xmm1 + a1 * b1;
1186 xmm2 = xmm2 + a1 * b2;
1187 xmm3 = xmm3 + a1 * b3;
1188 xmm4 = xmm4 + a1 * b4;
1189 xmm5 = xmm5 + a2 * b1;
1190 xmm6 = xmm6 + a2 * b2;
1191 xmm7 = xmm7 + a2 * b3;
1192 xmm8 = xmm8 + a2 * b4;
1194 (~C).
store( i , j , xmm1 );
1195 (~C).
store( i , j+IT::size , xmm2 );
1196 (~C).
store( i , j+IT::size*2UL, xmm3 );
1197 (~C).
store( i , j+IT::size*3UL, xmm4 );
1198 (~C).
store( i+1UL, j , xmm5 );
1199 (~C).
store( i+1UL, j+IT::size , xmm6 );
1200 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 );
1201 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 );
1208 for(
size_t k=0UL; k<K; ++k ) {
1210 xmm1 = xmm1 + a1 * B.load(k,j );
1211 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1212 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1213 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1215 (~C).
store( i, j , xmm1 );
1216 (~C).
store( i, j+IT::size , xmm2 );
1217 (~C).
store( i, j+IT::size*2UL, xmm3 );
1218 (~C).
store( i, j+IT::size*3UL, xmm4 );
1221 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1223 for( ; (i+2UL) <= M; i+=2UL ) {
1228 for(
size_t k=0UL; k<K; ++k ) {
1233 xmm1 = xmm1 + a1 * b1;
1234 xmm2 = xmm2 + a1 * b2;
1235 xmm3 = xmm3 + a2 * b1;
1236 xmm4 = xmm4 + a2 * b2;
1238 (~C).
store( i , j , xmm1 );
1239 (~C).
store( i , j+IT::size, xmm2 );
1240 (~C).
store( i+1UL, j , xmm3 );
1241 (~C).
store( i+1UL, j+IT::size, xmm4 );
1246 for(
size_t k=0UL; k<K; ++k ) {
1248 xmm1 = xmm1 + a1 * B.load(k,j );
1249 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1251 (~C).
store( i, j , xmm1 );
1252 (~C).
store( i, j+IT::size, xmm2 );
1257 for( ; (i+2UL) <= M; i+=2UL ) {
1260 for(
size_t k=0UL; k<K; ++k ) {
1262 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
1263 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
1265 (~C).
store( i , j, xmm1 );
1266 (~C).
store( i+1UL, j, xmm2 );
1270 for(
size_t k=0UL; k<K; ++k ) {
1271 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
1273 (~C).
store( i, j, xmm1 );
1294 template<
typename MT3
1297 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1298 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1303 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1307 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1311 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1337 template<
typename MT3
1340 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1341 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1343 selectDefaultAddAssignKernel( C, A, B );
1363 template<
typename MT3
1366 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1367 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1369 using boost::numeric_cast;
1375 const int M ( numeric_cast<int>( A.rows() ) );
1376 const int N ( numeric_cast<int>( B.columns() ) );
1377 const int K ( numeric_cast<int>( A.columns() ) );
1378 const int lda( numeric_cast<int>( A.spacing() ) );
1379 const int ldb( numeric_cast<int>( B.spacing() ) );
1380 const int ldc( numeric_cast<int>( C.spacing() ) );
1382 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1383 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1384 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1385 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1406 template<
typename MT3
1409 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1410 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1412 using boost::numeric_cast;
1418 const int M ( numeric_cast<int>( A.rows() ) );
1419 const int N ( numeric_cast<int>( B.columns() ) );
1420 const int K ( numeric_cast<int>( A.columns() ) );
1421 const int lda( numeric_cast<int>( A.spacing() ) );
1422 const int ldb( numeric_cast<int>( B.spacing() ) );
1423 const int ldc( numeric_cast<int>( C.spacing() ) );
1425 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1426 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1427 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1428 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1449 template<
typename MT3
1452 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1453 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1455 using boost::numeric_cast;
1464 const int M ( numeric_cast<int>( A.rows() ) );
1465 const int N ( numeric_cast<int>( B.columns() ) );
1466 const int K ( numeric_cast<int>( A.columns() ) );
1467 const int lda( numeric_cast<int>( A.spacing() ) );
1468 const int ldb( numeric_cast<int>( B.spacing() ) );
1469 const int ldc( numeric_cast<int>( C.spacing() ) );
1470 const complex<float> alpha( 1.0F, 0.0F );
1471 const complex<float> beta ( 1.0F, 0.0F );
1473 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1474 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1475 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1476 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1497 template<
typename MT3
1500 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1501 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1503 using boost::numeric_cast;
1512 const int M ( numeric_cast<int>( A.rows() ) );
1513 const int N ( numeric_cast<int>( B.columns() ) );
1514 const int K ( numeric_cast<int>( A.columns() ) );
1515 const int lda( numeric_cast<int>( A.spacing() ) );
1516 const int ldb( numeric_cast<int>( B.spacing() ) );
1517 const int ldc( numeric_cast<int>( C.spacing() ) );
1518 const complex<double> alpha( 1.0, 0.0 );
1519 const complex<double> beta ( 1.0, 0.0 );
1521 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1522 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1523 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1524 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1547 template<
typename MT3
1556 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1570 DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1586 template<
typename MT3
1589 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1590 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1593 DMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1595 DMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1611 template<
typename MT3
1614 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1615 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1636 template<
typename MT3
1639 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1640 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1642 const size_t M( A.rows() );
1643 const size_t N( B.columns() );
1644 const size_t K( A.columns() );
1647 const size_t end( N &
size_t(-2) );
1649 for(
size_t i=0UL; i<M; ++i ) {
1650 for(
size_t k=0UL; k<K; ++k ) {
1651 for(
size_t j=0UL; j<end; j+=2UL ) {
1652 C(i,j ) -= A(i,k) * B(k,j );
1653 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1656 C(i,end) -= A(i,k) * B(k,end);
1678 template<
typename MT3
1681 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1682 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1684 typedef IntrinsicTrait<ElementType> IT;
1686 const size_t M( A.rows() );
1687 const size_t N( B.columns() );
1688 const size_t K( A.columns() );
1692 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1693 for(
size_t i=0UL; i<M; ++i ) {
1702 for(
size_t k=0UL; k<K; ++k ) {
1704 xmm1 = xmm1 - a1 * B.load(k,j );
1705 xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1706 xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1707 xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1708 xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1709 xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1710 xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1711 xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1713 (~C).
store( i, j , xmm1 );
1714 (~C).
store( i, j+IT::size , xmm2 );
1715 (~C).
store( i, j+IT::size*2UL, xmm3 );
1716 (~C).
store( i, j+IT::size*3UL, xmm4 );
1717 (~C).
store( i, j+IT::size*4UL, xmm5 );
1718 (~C).
store( i, j+IT::size*5UL, xmm6 );
1719 (~C).
store( i, j+IT::size*6UL, xmm7 );
1720 (~C).
store( i, j+IT::size*7UL, xmm8 );
1723 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1725 for( ; (i+2UL) <= M; i+=2UL ) {
1734 for(
size_t k=0UL; k<K; ++k ) {
1741 xmm1 = xmm1 - a1 * b1;
1742 xmm2 = xmm2 - a1 * b2;
1743 xmm3 = xmm3 - a1 * b3;
1744 xmm4 = xmm4 - a1 * b4;
1745 xmm5 = xmm5 - a2 * b1;
1746 xmm6 = xmm6 - a2 * b2;
1747 xmm7 = xmm7 - a2 * b3;
1748 xmm8 = xmm8 - a2 * b4;
1750 (~C).
store( i , j , xmm1 );
1751 (~C).
store( i , j+IT::size , xmm2 );
1752 (~C).
store( i , j+IT::size*2UL, xmm3 );
1753 (~C).
store( i , j+IT::size*3UL, xmm4 );
1754 (~C).
store( i+1UL, j , xmm5 );
1755 (~C).
store( i+1UL, j+IT::size , xmm6 );
1756 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 );
1757 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 );
1764 for(
size_t k=0UL; k<K; ++k ) {
1766 xmm1 = xmm1 - a1 * B.load(k,j );
1767 xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1768 xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1769 xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1771 (~C).
store( i, j , xmm1 );
1772 (~C).
store( i, j+IT::size , xmm2 );
1773 (~C).
store( i, j+IT::size*2UL, xmm3 );
1774 (~C).
store( i, j+IT::size*3UL, xmm4 );
1777 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1779 for( ; (i+2UL) <= M; i+=2UL ) {
1784 for(
size_t k=0UL; k<K; ++k ) {
1789 xmm1 = xmm1 - a1 * b1;
1790 xmm2 = xmm2 - a1 * b2;
1791 xmm3 = xmm3 - a2 * b1;
1792 xmm4 = xmm4 - a2 * b2;
1794 (~C).
store( i , j , xmm1 );
1795 (~C).
store( i , j+IT::size, xmm2 );
1796 (~C).
store( i+1UL, j , xmm3 );
1797 (~C).
store( i+1UL, j+IT::size, xmm4 );
1802 for(
size_t k=0UL; k<K; ++k ) {
1804 xmm1 = xmm1 - a1 * B.load(k,j );
1805 xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
1807 (~C).
store( i, j , xmm1 );
1808 (~C).
store( i, j+IT::size, xmm2 );
1813 for( ; (i+2UL) <= M; i+=2UL ) {
1816 for(
size_t k=0UL; k<K; ++k ) {
1818 xmm1 = xmm1 -
set( A(i ,k) ) * b1;
1819 xmm2 = xmm2 -
set( A(i+1UL,k) ) * b1;
1821 (~C).
store( i , j, xmm1 );
1822 (~C).
store( i+1UL, j, xmm2 );
1826 for(
size_t k=0UL; k<K; ++k ) {
1827 xmm1 = xmm1 -
set( A(i,k) ) * B.load(k,j);
1829 (~C).
store( i, j, xmm1 );
1850 template<
typename MT3
1853 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1854 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1859 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1863 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1867 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1893 template<
typename MT3
1896 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1897 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1899 selectDefaultSubAssignKernel( C, A, B );
1919 template<
typename MT3
1922 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1923 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1925 using boost::numeric_cast;
1931 const int M ( numeric_cast<int>( A.rows() ) );
1932 const int N ( numeric_cast<int>( B.columns() ) );
1933 const int K ( numeric_cast<int>( A.columns() ) );
1934 const int lda( numeric_cast<int>( A.spacing() ) );
1935 const int ldb( numeric_cast<int>( B.spacing() ) );
1936 const int ldc( numeric_cast<int>( C.spacing() ) );
1938 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1939 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1940 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1941 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1962 template<
typename MT3
1965 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1966 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1968 using boost::numeric_cast;
1974 const int M ( numeric_cast<int>( A.rows() ) );
1975 const int N ( numeric_cast<int>( B.columns() ) );
1976 const int K ( numeric_cast<int>( A.columns() ) );
1977 const int lda( numeric_cast<int>( A.spacing() ) );
1978 const int ldb( numeric_cast<int>( B.spacing() ) );
1979 const int ldc( numeric_cast<int>( C.spacing() ) );
1981 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1982 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1983 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1984 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
2005 template<
typename MT3
2008 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2009 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2011 using boost::numeric_cast;
2020 const int M ( numeric_cast<int>( A.rows() ) );
2021 const int N ( numeric_cast<int>( B.columns() ) );
2022 const int K ( numeric_cast<int>( A.columns() ) );
2023 const int lda( numeric_cast<int>( A.spacing() ) );
2024 const int ldb( numeric_cast<int>( B.spacing() ) );
2025 const int ldc( numeric_cast<int>( C.spacing() ) );
2026 const complex<float> alpha( -1.0F, 0.0F );
2027 const complex<float> beta ( 1.0F, 0.0F );
2029 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2030 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2031 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2032 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2053 template<
typename MT3
2056 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2057 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2059 using boost::numeric_cast;
2068 const int M ( numeric_cast<int>( A.rows() ) );
2069 const int N ( numeric_cast<int>( B.columns() ) );
2070 const int K ( numeric_cast<int>( A.columns() ) );
2071 const int lda( numeric_cast<int>( A.spacing() ) );
2072 const int ldb( numeric_cast<int>( B.spacing() ) );
2073 const int ldc( numeric_cast<int>( C.spacing() ) );
2074 const complex<double> alpha( -1.0, 0.0 );
2075 const complex<double> beta ( 1.0, 0.0 );
2077 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2078 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2079 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2080 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2126 template<
typename MT1
2130 :
public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
2131 ,
private MatScalarMultExpr
2132 ,
private Computation
2136 typedef DMatDMatMultExpr<MT1,MT2> MMM;
2148 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2153 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2160 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2161 struct UseSMPAssignKernel {
2162 enum { value = evaluateLeft || evaluateRight };
2171 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2172 struct UseSinglePrecisionKernel {
2173 enum { value = IsFloat<typename T1::ElementType>::value &&
2174 IsFloat<typename T2::ElementType>::value &&
2175 IsFloat<typename T3::ElementType>::value &&
2176 !IsComplex<T4>::value };
2185 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2186 struct UseDoublePrecisionKernel {
2187 enum { value = IsDouble<typename T1::ElementType>::value &&
2188 IsDouble<typename T2::ElementType>::value &&
2189 IsDouble<typename T3::ElementType>::value &&
2190 !IsComplex<T4>::value };
2199 template<
typename T1,
typename T2,
typename T3 >
2200 struct UseSinglePrecisionComplexKernel {
2201 typedef complex<float> Type;
2202 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2203 IsSame<typename T2::ElementType,Type>::value &&
2204 IsSame<typename T3::ElementType,Type>::value };
2213 template<
typename T1,
typename T2,
typename T3 >
2214 struct UseDoublePrecisionComplexKernel {
2215 typedef complex<double> Type;
2216 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2217 IsSame<typename T2::ElementType,Type>::value &&
2218 IsSame<typename T3::ElementType,Type>::value };
2226 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2227 struct UseDefaultKernel {
2228 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2229 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2230 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2231 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2239 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2240 struct UseVectorizedDefaultKernel {
2241 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2242 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2243 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2244 IsSame<typename T1::ElementType,T4>::value &&
2245 IntrinsicTrait<typename T1::ElementType>::addition &&
2246 IntrinsicTrait<typename T1::ElementType>::subtraction &&
2247 IntrinsicTrait<typename T1::ElementType>::multiplication };
2253 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2254 typedef typename MultTrait<RES,ST>::Type
ResultType;
2258 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2263 typedef const DMatDMatMultExpr<MT1,MT2>
LeftOperand;
2269 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2272 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2277 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2278 IsSame<ET1,ET2>::value &&
2279 IsSame<ET1,ST>::value &&
2280 IntrinsicTrait<ET1>::addition &&
2281 IntrinsicTrait<ET1>::multiplication };
2284 enum { smpAssignable = !evaluateLeft && !evaluateRight };
2293 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2309 return matrix_(i,j) * scalar_;
2318 inline size_t rows()
const {
2319 return matrix_.rows();
2328 inline size_t columns()
const {
2329 return matrix_.columns();
2359 template<
typename T >
2360 inline bool canAlias(
const T* alias )
const {
2361 return matrix_.canAlias( alias );
2371 template<
typename T >
2372 inline bool isAliased(
const T* alias )
const {
2373 return matrix_.isAliased( alias );
2383 return matrix_.isAligned();
2393 typename MMM::LeftOperand A( matrix_.leftOperand() );
2415 template<
typename MT3
2417 friend inline void assign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2424 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2425 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2427 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2430 else if( left.columns() == 0UL ) {
2445 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2460 template<
typename MT3
2464 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2465 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2468 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2470 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2485 template<
typename MT3
2489 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2490 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2510 template<
typename MT3
2514 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2515 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2517 const size_t M( A.rows() );
2518 const size_t N( B.columns() );
2519 const size_t K( A.columns() );
2521 for(
size_t i=0UL; i<M; ++i ) {
2522 for(
size_t j=0UL; j<N; ++j ) {
2523 C(i,j) = A(i,0UL) * B(0UL,j);
2525 for(
size_t k=1UL; k<K; ++k ) {
2526 for(
size_t j=0UL; j<N; ++j ) {
2527 C(i,j) += A(i,k) * B(k,j);
2530 for(
size_t j=0UL; j<N; ++j ) {
2551 template<
typename MT3
2555 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2556 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2558 typedef IntrinsicTrait<ElementType> IT;
2560 const size_t M( A.rows() );
2561 const size_t N( B.columns() );
2562 const size_t K( A.columns() );
2568 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2569 for(
size_t i=0UL; i<M; ++i ) {
2570 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2571 for(
size_t k=0UL; k<K; ++k ) {
2573 xmm1 = xmm1 + a1 * B.load(k,j );
2574 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2575 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2576 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2577 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2578 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2579 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2580 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2582 (~C).
store( i, j , xmm1 * factor );
2583 (~C).
store( i, j+IT::size , xmm2 * factor );
2584 (~C).
store( i, j+IT::size*2UL, xmm3 * factor );
2585 (~C).
store( i, j+IT::size*3UL, xmm4 * factor );
2586 (~C).
store( i, j+IT::size*4UL, xmm5 * factor );
2587 (~C).
store( i, j+IT::size*5UL, xmm6 * factor );
2588 (~C).
store( i, j+IT::size*6UL, xmm7 * factor );
2589 (~C).
store( i, j+IT::size*7UL, xmm8 * factor );
2592 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2594 for( ; (i+2UL) <= M; i+=2UL ) {
2595 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2596 for(
size_t k=0UL; k<K; ++k ) {
2603 xmm1 = xmm1 + a1 * b1;
2604 xmm2 = xmm2 + a1 * b2;
2605 xmm3 = xmm3 + a1 * b3;
2606 xmm4 = xmm4 + a1 * b4;
2607 xmm5 = xmm5 + a2 * b1;
2608 xmm6 = xmm6 + a2 * b2;
2609 xmm7 = xmm7 + a2 * b3;
2610 xmm8 = xmm8 + a2 * b4;
2612 (~C).
store( i , j , xmm1 * factor );
2613 (~C).
store( i , j+IT::size , xmm2 * factor );
2614 (~C).
store( i , j+IT::size*2UL, xmm3 * factor );
2615 (~C).
store( i , j+IT::size*3UL, xmm4 * factor );
2616 (~C).
store( i+1UL, j , xmm5 * factor );
2617 (~C).
store( i+1UL, j+IT::size , xmm6 * factor );
2618 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 * factor );
2619 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 * factor );
2623 for(
size_t k=0UL; k<K; ++k ) {
2625 xmm1 = xmm1 + a1 * B.load(k,j );
2626 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2627 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2628 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2630 (~C).
store( i, j , xmm1 * factor );
2631 (~C).
store( i, j+IT::size , xmm2 * factor );
2632 (~C).
store( i, j+IT::size*2UL, xmm3 * factor );
2633 (~C).
store( i, j+IT::size*3UL, xmm4 * factor );
2636 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2638 for( ; (i+2UL) <= M; i+=2UL ) {
2640 for(
size_t k=0UL; k<K; ++k ) {
2645 xmm1 = xmm1 + a1 * b1;
2646 xmm2 = xmm2 + a1 * b2;
2647 xmm3 = xmm3 + a2 * b1;
2648 xmm4 = xmm4 + a2 * b2;
2650 (~C).
store( i , j , xmm1 * factor );
2651 (~C).
store( i , j+IT::size, xmm2 * factor );
2652 (~C).
store( i+1UL, j , xmm3 * factor );
2653 (~C).
store( i+1UL, j+IT::size, xmm4 * factor );
2657 for(
size_t k=0UL; k<K; ++k ) {
2659 xmm1 = xmm1 + a1 * B.load(k,j );
2660 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2662 (~C).
store( i, j , xmm1 * factor );
2663 (~C).
store( i, j+IT::size, xmm2 * factor );
2668 for( ; (i+2UL) <= M; i+=2UL ) {
2670 for(
size_t k=0UL; k<K; ++k ) {
2672 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
2673 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
2675 (~C).
store( i , j, xmm1 * factor );
2676 (~C).
store( i+1UL, j, xmm2 * factor );
2680 for(
size_t k=0UL; k<K; ++k ) {
2681 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
2683 (~C).
store( i, j, xmm1 * factor );
2703 template<
typename MT3
2707 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2708 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2713 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2717 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2721 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2746 template<
typename MT3
2750 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2751 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2753 selectDefaultAssignKernel( C, A, B, scalar );
2772 template<
typename MT3
2776 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2777 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2779 using boost::numeric_cast;
2785 const int M ( numeric_cast<int>( A.rows() ) );
2786 const int N ( numeric_cast<int>( B.columns() ) );
2787 const int K ( numeric_cast<int>( A.columns() ) );
2788 const int lda( numeric_cast<int>( A.spacing() ) );
2789 const int ldb( numeric_cast<int>( B.spacing() ) );
2790 const int ldc( numeric_cast<int>( C.spacing() ) );
2792 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2793 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2794 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2795 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2815 template<
typename MT3
2819 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2820 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2822 using boost::numeric_cast;
2828 const int M ( numeric_cast<int>( A.rows() ) );
2829 const int N ( numeric_cast<int>( B.columns() ) );
2830 const int K ( numeric_cast<int>( A.columns() ) );
2831 const int lda( numeric_cast<int>( A.spacing() ) );
2832 const int ldb( numeric_cast<int>( B.spacing() ) );
2833 const int ldc( numeric_cast<int>( C.spacing() ) );
2835 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2836 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2837 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2838 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2858 template<
typename MT3
2862 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2863 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2865 using boost::numeric_cast;
2874 const int M ( numeric_cast<int>( A.rows() ) );
2875 const int N ( numeric_cast<int>( B.columns() ) );
2876 const int K ( numeric_cast<int>( A.columns() ) );
2877 const int lda( numeric_cast<int>( A.spacing() ) );
2878 const int ldb( numeric_cast<int>( B.spacing() ) );
2879 const int ldc( numeric_cast<int>( C.spacing() ) );
2880 const complex<float> alpha( scalar );
2881 const complex<float> beta ( 0.0F, 0.0F );
2883 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2884 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2885 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2886 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2906 template<
typename MT3
2910 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2911 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2913 using boost::numeric_cast;
2922 const int M ( numeric_cast<int>( A.rows() ) );
2923 const int N ( numeric_cast<int>( B.columns() ) );
2924 const int K ( numeric_cast<int>( A.columns() ) );
2925 const int lda( numeric_cast<int>( A.spacing() ) );
2926 const int ldb( numeric_cast<int>( B.spacing() ) );
2927 const int ldc( numeric_cast<int>( C.spacing() ) );
2928 const complex<double> alpha( scalar );
2929 const complex<double> beta ( 0.0, 0.0 );
2931 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2932 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2933 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2934 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2950 template<
typename MT
2952 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2956 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2968 const TmpType tmp( rhs );
2985 template<
typename MT3
2987 friend inline void addAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2994 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2995 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2997 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3011 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3026 template<
typename MT3
3030 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3031 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3034 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3036 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3051 template<
typename MT3
3055 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3056 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3076 template<
typename MT3
3080 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3081 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3102 template<
typename MT3
3106 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3107 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3109 typedef IntrinsicTrait<ElementType> IT;
3111 const size_t M( A.rows() );
3112 const size_t N( B.columns() );
3113 const size_t K( A.columns() );
3119 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3120 for(
size_t i=0UL; i<M; ++i ) {
3121 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3122 for(
size_t k=0UL; k<K; ++k ) {
3124 xmm1 = xmm1 + a1 * B.load(k,j );
3125 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3126 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3127 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3128 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3129 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3130 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3131 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3133 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3134 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3135 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3136 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3137 (~C).
store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
3138 (~C).
store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
3139 (~C).
store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
3140 (~C).
store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
3143 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3145 for( ; (i+2UL) <= M; i+=2UL ) {
3146 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3147 for(
size_t k=0UL; k<K; ++k ) {
3154 xmm1 = xmm1 + a1 * b1;
3155 xmm2 = xmm2 + a1 * b2;
3156 xmm3 = xmm3 + a1 * b3;
3157 xmm4 = xmm4 + a1 * b4;
3158 xmm5 = xmm5 + a2 * b1;
3159 xmm6 = xmm6 + a2 * b2;
3160 xmm7 = xmm7 + a2 * b3;
3161 xmm8 = xmm8 + a2 * b4;
3163 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3164 (~C).
store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
3165 (~C).
store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
3166 (~C).
store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
3167 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3168 (~C).
store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
3169 (~C).
store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
3170 (~C).
store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
3174 for(
size_t k=0UL; k<K; ++k ) {
3176 xmm1 = xmm1 + a1 * B.load(k,j );
3177 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3178 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3179 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3181 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3182 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3183 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3184 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3187 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3189 for( ; (i+2UL) <= M; i+=2UL ) {
3191 for(
size_t k=0UL; k<K; ++k ) {
3196 xmm1 = xmm1 + a1 * b1;
3197 xmm2 = xmm2 + a1 * b2;
3198 xmm3 = xmm3 + a2 * b1;
3199 xmm4 = xmm4 + a2 * b2;
3201 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3202 (~C).
store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
3203 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3204 (~C).
store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
3208 for(
size_t k=0UL; k<K; ++k ) {
3210 xmm1 = xmm1 + a1 * B.load(k,j );
3211 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3213 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3214 (~C).
store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
3219 for( ; (i+2UL) <= M; i+=2UL ) {
3221 for(
size_t k=0UL; k<K; ++k ) {
3223 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
3224 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
3226 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3227 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3231 for(
size_t k=0UL; k<K; ++k ) {
3232 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
3234 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
3254 template<
typename MT3
3258 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3259 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3264 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3268 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3272 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3297 template<
typename MT3
3301 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3302 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3304 selectDefaultAddAssignKernel( C, A, B, scalar );
3323 template<
typename MT3
3327 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3328 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3330 using boost::numeric_cast;
3336 const int M ( numeric_cast<int>( A.rows() ) );
3337 const int N ( numeric_cast<int>( B.columns() ) );
3338 const int K ( numeric_cast<int>( A.columns() ) );
3339 const int lda( numeric_cast<int>( A.spacing() ) );
3340 const int ldb( numeric_cast<int>( B.spacing() ) );
3341 const int ldc( numeric_cast<int>( C.spacing() ) );
3343 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3344 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3345 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3346 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3366 template<
typename MT3
3370 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3371 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3373 using boost::numeric_cast;
3379 const int M ( numeric_cast<int>( A.rows() ) );
3380 const int N ( numeric_cast<int>( B.columns() ) );
3381 const int K ( numeric_cast<int>( A.columns() ) );
3382 const int lda( numeric_cast<int>( A.spacing() ) );
3383 const int ldb( numeric_cast<int>( B.spacing() ) );
3384 const int ldc( numeric_cast<int>( C.spacing() ) );
3386 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3387 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3388 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3389 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3409 template<
typename MT3
3413 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3414 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3416 using boost::numeric_cast;
3425 const int M ( numeric_cast<int>( A.rows() ) );
3426 const int N ( numeric_cast<int>( B.columns() ) );
3427 const int K ( numeric_cast<int>( A.columns() ) );
3428 const int lda( numeric_cast<int>( A.spacing() ) );
3429 const int ldb( numeric_cast<int>( B.spacing() ) );
3430 const int ldc( numeric_cast<int>( C.spacing() ) );
3431 const complex<float> alpha( scalar );
3432 const complex<float> beta ( 1.0F, 0.0F );
3434 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3435 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3436 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3437 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3457 template<
typename MT3
3461 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3462 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3464 using boost::numeric_cast;
3473 const int M ( numeric_cast<int>( A.rows() ) );
3474 const int N ( numeric_cast<int>( B.columns() ) );
3475 const int K ( numeric_cast<int>( A.columns() ) );
3476 const int lda( numeric_cast<int>( A.spacing() ) );
3477 const int ldb( numeric_cast<int>( B.spacing() ) );
3478 const int ldc( numeric_cast<int>( C.spacing() ) );
3479 const complex<double> alpha( scalar );
3480 const complex<double> beta ( 1.0, 0.0 );
3482 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3483 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3484 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3485 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3506 template<
typename MT3
3508 friend inline void subAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
3515 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3516 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3518 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3532 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3547 template<
typename MT3
3551 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3552 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3555 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3557 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3572 template<
typename MT3
3576 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3577 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3597 template<
typename MT3
3601 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3602 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3623 template<
typename MT3
3627 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3628 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3630 typedef IntrinsicTrait<ElementType> IT;
3632 const size_t M( A.rows() );
3633 const size_t N( B.columns() );
3634 const size_t K( A.columns() );
3640 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3641 for(
size_t i=0UL; i<M; ++i ) {
3642 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3643 for(
size_t k=0UL; k<K; ++k ) {
3645 xmm1 = xmm1 + a1 * B.load(k,j );
3646 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3647 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3648 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3649 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3650 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3651 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3652 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3654 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3655 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3656 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3657 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3658 (~C).
store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
3659 (~C).
store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
3660 (~C).
store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
3661 (~C).
store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
3664 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3666 for( ; (i+2UL) <= M; i+=2UL ) {
3667 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3668 for(
size_t k=0UL; k<K; ++k ) {
3675 xmm1 = xmm1 + a1 * b1;
3676 xmm2 = xmm2 + a1 * b2;
3677 xmm3 = xmm3 + a1 * b3;
3678 xmm4 = xmm4 + a1 * b4;
3679 xmm5 = xmm5 + a2 * b1;
3680 xmm6 = xmm6 + a2 * b2;
3681 xmm7 = xmm7 + a2 * b3;
3682 xmm8 = xmm8 + a2 * b4;
3684 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3685 (~C).
store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
3686 (~C).
store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
3687 (~C).
store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
3688 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
3689 (~C).
store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
3690 (~C).
store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
3691 (~C).
store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
3695 for(
size_t k=0UL; k<K; ++k ) {
3697 xmm1 = xmm1 + a1 * B.load(k,j );
3698 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3699 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3700 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3702 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3703 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3704 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3705 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3708 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3710 for( ; (i+2UL) <= M; i+=2UL ) {
3712 for(
size_t k=0UL; k<K; ++k ) {
3717 xmm1 = xmm1 + a1 * b1;
3718 xmm2 = xmm2 + a1 * b2;
3719 xmm3 = xmm3 + a2 * b1;
3720 xmm4 = xmm4 + a2 * b2;
3722 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3723 (~C).
store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
3724 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
3725 (~C).
store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
3729 for(
size_t k=0UL; k<K; ++k ) {
3731 xmm1 = xmm1 + a1 * B.load(k,j );
3732 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3734 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3735 (~C).
store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
3740 for( ; (i+2UL) <= M; i+=2UL ) {
3742 for(
size_t k=0UL; k<K; ++k ) {
3744 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
3745 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
3747 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3748 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
3752 for(
size_t k=0UL; k<K; ++k ) {
3753 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
3755 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
3775 template<
typename MT3
3779 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3780 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3785 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3789 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3793 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3818 template<
typename MT3
3822 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3823 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3825 selectDefaultSubAssignKernel( C, A, B, scalar );
3844 template<
typename MT3
3848 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3849 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3851 using boost::numeric_cast;
3857 const int M ( numeric_cast<int>( A.rows() ) );
3858 const int N ( numeric_cast<int>( B.columns() ) );
3859 const int K ( numeric_cast<int>( A.columns() ) );
3860 const int lda( numeric_cast<int>( A.spacing() ) );
3861 const int ldb( numeric_cast<int>( B.spacing() ) );
3862 const int ldc( numeric_cast<int>( C.spacing() ) );
3864 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3865 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3866 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3867 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3887 template<
typename MT3
3891 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3892 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3894 using boost::numeric_cast;
3900 const int M ( numeric_cast<int>( A.rows() ) );
3901 const int N ( numeric_cast<int>( B.columns() ) );
3902 const int K ( numeric_cast<int>( A.columns() ) );
3903 const int lda( numeric_cast<int>( A.spacing() ) );
3904 const int ldb( numeric_cast<int>( B.spacing() ) );
3905 const int ldc( numeric_cast<int>( C.spacing() ) );
3907 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3908 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3909 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3910 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3930 template<
typename MT3
3934 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3935 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3937 using boost::numeric_cast;
3946 const int M ( numeric_cast<int>( A.rows() ) );
3947 const int N ( numeric_cast<int>( B.columns() ) );
3948 const int K ( numeric_cast<int>( A.columns() ) );
3949 const int lda( numeric_cast<int>( A.spacing() ) );
3950 const int ldb( numeric_cast<int>( B.spacing() ) );
3951 const int ldc( numeric_cast<int>( C.spacing() ) );
3952 const complex<float> alpha( -scalar );
3953 const complex<float> beta ( 1.0F, 0.0F );
3955 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3956 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3957 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3958 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3978 template<
typename MT3
3982 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3983 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3985 using boost::numeric_cast;
3994 const int M ( numeric_cast<int>( A.rows() ) );
3995 const int N ( numeric_cast<int>( B.columns() ) );
3996 const int K ( numeric_cast<int>( A.columns() ) );
3997 const int lda( numeric_cast<int>( A.spacing() ) );
3998 const int ldb( numeric_cast<int>( B.spacing() ) );
3999 const int ldc( numeric_cast<int>( C.spacing() ) );
4000 const complex<double> alpha( -scalar );
4001 const complex<double> beta ( 1.0, 0.0 );
4003 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4004 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4005 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4006 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4072 template<
typename T1
4074 inline const DMatDMatMultExpr<T1,T2>
4080 throw std::invalid_argument(
"Matrix sizes do not match" );
4097 template<
typename MT1,
typename MT2,
typename VT >
4102 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4103 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4104 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4105 ,
typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
4106 , INVALID_TYPE >::Type Type;
4115 template<
typename MT1,
typename MT2,
typename VT >
4120 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4121 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4122 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4123 ,
typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
4124 , INVALID_TYPE >::Type Type;
4133 template<
typename VT,
typename MT1,
typename MT2 >
4138 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4139 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4140 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4141 ,
typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4142 , INVALID_TYPE >::Type Type;
4151 template<
typename VT,
typename MT1,
typename MT2 >
4156 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4157 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4158 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4159 ,
typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4160 , INVALID_TYPE >::Type Type;
4169 template<
typename MT1,
typename MT2,
bool AF >
4174 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4175 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4184 template<
typename MT1,
typename MT2 >
4189 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4198 template<
typename MT1,
typename MT2 >
4203 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:127
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:124
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:259
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:330
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:246
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:250
Header file for the sparse matrix SMP implementation.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:247
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:256
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:116
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2380
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:125
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:350
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:251
Header file for the dense matrix SMP implementation.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:394
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:285
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:404
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:248
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:372
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:360
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:300
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:413
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:247
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:253
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:340
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:249
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:265
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:262
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:252
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:122
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold represents ...
Definition: Thresholds.h:433
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:126
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:123
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:414
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of 'float' values.
Definition: Store.h:242
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:384
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.