35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
120 template<
typename MT1
152 template<
typename T1,
typename T2,
typename T3 >
153 struct IsEvaluationRequired {
154 enum { value = ( evaluateLeft || evaluateRight ) };
164 template<
typename T1,
typename T2,
typename T3 >
165 struct UseSinglePrecisionKernel {
167 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
168 IsFloat<typename T1::ElementType>::value &&
169 IsFloat<typename T2::ElementType>::value &&
170 IsFloat<typename T3::ElementType>::value };
180 template<
typename T1,
typename T2,
typename T3 >
181 struct UseDoublePrecisionKernel {
183 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184 IsDouble<typename T1::ElementType>::value &&
185 IsDouble<typename T2::ElementType>::value &&
186 IsDouble<typename T3::ElementType>::value };
197 template<
typename T1,
typename T2,
typename T3 >
198 struct UseSinglePrecisionComplexKernel {
199 typedef complex<float> Type;
201 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
202 IsSame<typename T1::ElementType,Type>::value &&
203 IsSame<typename T2::ElementType,Type>::value &&
204 IsSame<typename T3::ElementType,Type>::value };
215 template<
typename T1,
typename T2,
typename T3 >
216 struct UseDoublePrecisionComplexKernel {
217 typedef complex<double> Type;
219 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
220 IsSame<typename T1::ElementType,Type>::value &&
221 IsSame<typename T2::ElementType,Type>::value &&
222 IsSame<typename T3::ElementType,Type>::value };
232 template<
typename T1,
typename T2,
typename T3 >
233 struct UseDefaultKernel {
234 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
235 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
236 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
237 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
247 template<
typename T1,
typename T2,
typename T3 >
248 struct UseVectorizedDefaultKernel {
249 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
250 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
251 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
252 IntrinsicTrait<typename T1::ElementType>::addition &&
253 IntrinsicTrait<typename T1::ElementType>::multiplication };
284 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
290 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
291 !evaluateRight && MT2::smpAssignable };
321 if(
lhs_.columns() != 0UL ) {
322 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
324 for(
size_t k=1UL; k<
end; k+=2UL ) {
326 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
328 if( end <
lhs_.columns() ) {
356 return rhs_.columns();
386 template<
typename T >
388 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
398 template<
typename T >
400 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
410 return lhs_.isAligned() &&
rhs_.isAligned();
449 template<
typename MT3
452 static inline void sgemm( MT3& C,
const MT4& A,
const MT5& B,
float alpha,
float beta )
454 using boost::numeric_cast;
460 const int M ( numeric_cast<int>( A.rows() ) );
461 const int N ( numeric_cast<int>( B.columns() ) );
462 const int K ( numeric_cast<int>( A.columns() ) );
463 const int lda( numeric_cast<int>( A.spacing() ) );
464 const int ldb( numeric_cast<int>( B.spacing() ) );
465 const int ldc( numeric_cast<int>( C.spacing() ) );
468 cblas_ssymm( CblasColMajor, CblasLeft, CblasLower,
469 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
472 cblas_ssymm( CblasRowMajor, CblasRight, CblasUpper,
473 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
479 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
503 template<
typename MT3
506 static inline void dgemm( MT3& C,
const MT4& A,
const MT5& B,
double alpha,
double beta )
508 using boost::numeric_cast;
514 const int M ( numeric_cast<int>( A.rows() ) );
515 const int N ( numeric_cast<int>( B.columns() ) );
516 const int K ( numeric_cast<int>( A.columns() ) );
517 const int lda( numeric_cast<int>( A.spacing() ) );
518 const int ldb( numeric_cast<int>( B.spacing() ) );
519 const int ldc( numeric_cast<int>( C.spacing() ) );
522 cblas_dsymm( CblasColMajor, CblasLeft, CblasLower,
523 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
525 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
526 cblas_dsymm( CblasRowMajor, CblasRight, CblasUpper,
527 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
530 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
531 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
532 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
533 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
557 template<
typename MT3
560 static inline void cgemm( MT3& C,
const MT4& A,
const MT5& B,
561 complex<float> alpha, complex<float> beta )
563 using boost::numeric_cast;
572 const int M ( numeric_cast<int>( A.rows() ) );
573 const int N ( numeric_cast<int>( B.columns() ) );
574 const int K ( numeric_cast<int>( A.columns() ) );
575 const int lda( numeric_cast<int>( A.spacing() ) );
576 const int ldb( numeric_cast<int>( B.spacing() ) );
577 const int ldc( numeric_cast<int>( C.spacing() ) );
579 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
580 cblas_csymm( CblasColMajor, CblasLeft, CblasLower,
581 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
583 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
584 cblas_csymm( CblasRowMajor, CblasRight, CblasUpper,
585 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
588 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
589 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
590 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
591 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
615 template<
typename MT3
618 static inline void zgemm( MT3& C,
const MT4& A,
const MT5& B,
619 complex<double> alpha, complex<double> beta )
621 using boost::numeric_cast;
630 const int M ( numeric_cast<int>( A.rows() ) );
631 const int N ( numeric_cast<int>( B.columns() ) );
632 const int K ( numeric_cast<int>( A.columns() ) );
633 const int lda( numeric_cast<int>( A.spacing() ) );
634 const int ldb( numeric_cast<int>( B.spacing() ) );
635 const int ldc( numeric_cast<int>( C.spacing() ) );
637 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
638 cblas_zsymm( CblasColMajor, CblasLeft, CblasLower,
639 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
641 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
642 cblas_zsymm( CblasRowMajor, CblasRight, CblasUpper,
643 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
646 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
647 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
648 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
649 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
669 template<
typename MT
678 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
681 else if( rhs.lhs_.columns() == 0UL ) {
696 DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
712 template<
typename MT3
715 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
718 DMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
720 DMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
739 template<
typename MT3
742 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
743 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
745 const size_t M( A.rows() );
746 const size_t N( B.columns() );
747 const size_t K( A.columns() );
749 for(
size_t i=0UL; i<M; ++i ) {
750 for(
size_t j=0UL; j<N; ++j ) {
751 C(i,j) = A(i,0UL) * B(0UL,j);
753 for(
size_t k=1UL; k<K; ++k ) {
754 for(
size_t j=0UL; j<N; ++j ) {
755 C(i,j) += A(i,k) * B(k,j);
777 template<
typename MT3
780 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
781 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
783 typedef IntrinsicTrait<ElementType> IT;
785 const size_t M( A.rows() );
786 const size_t N( B.columns() );
787 const size_t K( A.columns() );
791 for( ; (i+2UL) <= M; i+=2UL ) {
793 for( ; (j+4UL) <= N; j+=4UL ) {
794 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
795 for(
size_t k=0UL; k<K; k+=
IT::size ) {
802 xmm1 = xmm1 + a1 * b1;
803 xmm2 = xmm2 + a1 * b2;
804 xmm3 = xmm3 + a1 * b3;
805 xmm4 = xmm4 + a1 * b4;
806 xmm5 = xmm5 + a2 * b1;
807 xmm6 = xmm6 + a2 * b2;
808 xmm7 = xmm7 + a2 * b3;
809 xmm8 = xmm8 + a2 * b4;
811 (~C)(i ,j ) =
sum( xmm1 );
812 (~C)(i ,j+1UL) =
sum( xmm2 );
813 (~C)(i ,j+2UL) =
sum( xmm3 );
814 (~C)(i ,j+3UL) =
sum( xmm4 );
815 (~C)(i+1UL,j ) =
sum( xmm5 );
816 (~C)(i+1UL,j+1UL) =
sum( xmm6 );
817 (~C)(i+1UL,j+2UL) =
sum( xmm7 );
818 (~C)(i+1UL,j+3UL) =
sum( xmm8 );
820 for( ; (j+2UL) <= N; j+=2UL ) {
822 for(
size_t k=0UL; k<K; k+=
IT::size ) {
827 xmm1 = xmm1 + a1 * b1;
828 xmm2 = xmm2 + a1 * b2;
829 xmm3 = xmm3 + a2 * b1;
830 xmm4 = xmm4 + a2 * b2;
832 (~C)(i ,j ) =
sum( xmm1 );
833 (~C)(i ,j+1UL) =
sum( xmm2 );
834 (~C)(i+1UL,j ) =
sum( xmm3 );
835 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
839 for(
size_t k=0UL; k<K; k+=
IT::size ) {
841 xmm1 = xmm1 + A.load(i ,k) * b1;
842 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
844 (~C)(i ,j) =
sum( xmm1 );
845 (~C)(i+1UL,j) =
sum( xmm2 );
850 for( ; (j+4UL) <= N; j+=4UL ) {
852 for(
size_t k=0UL; k<K; k+=
IT::size ) {
854 xmm1 = xmm1 + a1 * B.load(k,j );
855 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
856 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
857 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
859 (~C)(i,j ) =
sum( xmm1 );
860 (~C)(i,j+1UL) =
sum( xmm2 );
861 (~C)(i,j+2UL) =
sum( xmm3 );
862 (~C)(i,j+3UL) =
sum( xmm4 );
864 for( ; (j+2UL) <= N; j+=2UL ) {
866 for(
size_t k=0UL; k<K; k+=
IT::size ) {
868 xmm1 = xmm1 + a1 * B.load(k,j );
869 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
871 (~C)(i,j ) =
sum( xmm1 );
872 (~C)(i,j+1UL) =
sum( xmm2 );
876 for(
size_t k=0UL; k<K; k+=
IT::size ) {
877 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
879 (~C)(i,j) =
sum( xmm1 );
900 template<
typename MT3
903 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
904 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
906 typedef IntrinsicTrait<ElementType> IT;
908 const size_t M( A.rows() );
909 const size_t N( B.columns() );
910 const size_t K( A.columns() );
914 for( ; (i+4UL) <= M; i+=4UL ) {
916 for( ; (j+2UL) <= N; j+=2UL ) {
917 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
918 for(
size_t k=0UL; k<K; k+=
IT::size ) {
925 xmm1 = xmm1 + a1 * b1;
926 xmm2 = xmm2 + a1 * b2;
927 xmm3 = xmm3 + a2 * b1;
928 xmm4 = xmm4 + a2 * b2;
929 xmm5 = xmm5 + a3 * b1;
930 xmm6 = xmm6 + a3 * b2;
931 xmm7 = xmm7 + a4 * b1;
932 xmm8 = xmm8 + a4 * b2;
934 (~C)(i ,j ) =
sum( xmm1 );
935 (~C)(i ,j+1UL) =
sum( xmm2 );
936 (~C)(i+1UL,j ) =
sum( xmm3 );
937 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
938 (~C)(i+2UL,j ) =
sum( xmm5 );
939 (~C)(i+2UL,j+1UL) =
sum( xmm6 );
940 (~C)(i+3UL,j ) =
sum( xmm7 );
941 (~C)(i+3UL,j+1UL) =
sum( xmm8 );
945 for(
size_t k=0UL; k<K; k+=
IT::size ) {
947 xmm1 = xmm1 + A.load(i ,k) * b1;
948 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
949 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
950 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
952 (~C)(i ,j) =
sum( xmm1 );
953 (~C)(i+1UL,j) =
sum( xmm2 );
954 (~C)(i+2UL,j) =
sum( xmm3 );
955 (~C)(i+3UL,j) =
sum( xmm4 );
958 for( ; (i+2UL) <= M; i+=2UL ) {
960 for( ; (j+2UL) <= N; j+=2UL ) {
962 for(
size_t k=0UL; k<K; k+=
IT::size ) {
967 xmm1 = xmm1 + a1 * b1;
968 xmm2 = xmm2 + a1 * b2;
969 xmm3 = xmm3 + a2 * b1;
970 xmm4 = xmm4 + a2 * b2;
972 (~C)(i ,j ) =
sum( xmm1 );
973 (~C)(i ,j+1UL) =
sum( xmm2 );
974 (~C)(i+1UL,j ) =
sum( xmm3 );
975 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
979 for(
size_t k=0UL; k<K; k+=
IT::size ) {
981 xmm1 = xmm1 + A.load(i ,k) * b1;
982 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
984 (~C)(i ,j) =
sum( xmm1 );
985 (~C)(i+1UL,j) =
sum( xmm2 );
990 for( ; (j+2UL) <= N; j+=2UL ) {
992 for(
size_t k=0UL; k<K; k+=
IT::size ) {
994 xmm1 = xmm1 + a1 * B.load(k,j );
995 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
997 (~C)(i,j ) =
sum( xmm1 );
998 (~C)(i,j+1UL) =
sum( xmm2 );
1002 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1003 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1005 (~C)(i,j) =
sum( xmm1 );
1026 template<
typename MT3
1029 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1030 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1032 selectDefaultAssignKernel( C, A, B );
1052 template<
typename MT3
1055 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1056 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1058 sgemm( C, A, B, 1.0F, 0.0F );
1079 template<
typename MT3
1082 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1083 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1085 dgemm( C, A, B, 1.0, 0.0 );
1106 template<
typename MT3
1109 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1110 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1112 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1133 template<
typename MT3
1136 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1137 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1139 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1158 template<
typename MT
1164 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1176 const TmpType tmp(
serial( rhs ) );
1195 template<
typename MT
1204 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1218 DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1234 template<
typename MT3
1237 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1240 DMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1242 DMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1261 template<
typename MT3
1264 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1265 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1267 const size_t M( A.rows() );
1268 const size_t N( B.columns() );
1269 const size_t K( A.columns() );
1272 const size_t end( N &
size_t(-2) );
1274 for(
size_t i=0UL; i<M; ++i ) {
1275 for(
size_t k=0UL; k<K; ++k ) {
1276 for(
size_t j=0UL; j<
end; j+=2UL ) {
1277 C(i,j ) += A(i,k) * B(k,j );
1278 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1281 C(i,end) += A(i,k) * B(k,end);
1303 template<
typename MT3
1306 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1307 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1309 typedef IntrinsicTrait<ElementType> IT;
1311 const size_t M( A.rows() );
1312 const size_t N( B.columns() );
1313 const size_t K( A.columns() );
1317 for( ; (i+2UL) <= M; i+=2UL ) {
1319 for( ; (j+4UL) <= N; j+=4UL ) {
1320 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1321 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1328 xmm1 = xmm1 + a1 * b1;
1329 xmm2 = xmm2 + a1 * b2;
1330 xmm3 = xmm3 + a1 * b3;
1331 xmm4 = xmm4 + a1 * b4;
1332 xmm5 = xmm5 + a2 * b1;
1333 xmm6 = xmm6 + a2 * b2;
1334 xmm7 = xmm7 + a2 * b3;
1335 xmm8 = xmm8 + a2 * b4;
1337 (~C)(i ,j ) +=
sum( xmm1 );
1338 (~C)(i ,j+1UL) +=
sum( xmm2 );
1339 (~C)(i ,j+2UL) +=
sum( xmm3 );
1340 (~C)(i ,j+3UL) +=
sum( xmm4 );
1341 (~C)(i+1UL,j ) +=
sum( xmm5 );
1342 (~C)(i+1UL,j+1UL) +=
sum( xmm6 );
1343 (~C)(i+1UL,j+2UL) +=
sum( xmm7 );
1344 (~C)(i+1UL,j+3UL) +=
sum( xmm8 );
1346 for( ; (j+2UL) <= N; j+=2UL ) {
1348 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1353 xmm1 = xmm1 + a1 * b1;
1354 xmm2 = xmm2 + a1 * b2;
1355 xmm3 = xmm3 + a2 * b1;
1356 xmm4 = xmm4 + a2 * b2;
1358 (~C)(i ,j ) +=
sum( xmm1 );
1359 (~C)(i ,j+1UL) +=
sum( xmm2 );
1360 (~C)(i+1UL,j ) +=
sum( xmm3 );
1361 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1365 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1367 xmm1 = xmm1 + A.load(i ,k) * b1;
1368 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1370 (~C)(i ,j) +=
sum( xmm1 );
1371 (~C)(i+1UL,j) +=
sum( xmm2 );
1376 for( ; (j+4UL) <= N; j+=4UL ) {
1378 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1380 xmm1 = xmm1 + a1 * B.load(k,j );
1381 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1382 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1383 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1385 (~C)(i,j ) +=
sum( xmm1 );
1386 (~C)(i,j+1UL) +=
sum( xmm2 );
1387 (~C)(i,j+2UL) +=
sum( xmm3 );
1388 (~C)(i,j+3UL) +=
sum( xmm4 );
1390 for( ; (j+2UL) <= N; j+=2UL ) {
1392 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1394 xmm1 = xmm1 + a1 * B.load(k,j );
1395 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1397 (~C)(i,j ) +=
sum( xmm1 );
1398 (~C)(i,j+1UL) +=
sum( xmm2 );
1402 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1403 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1405 (~C)(i,j) +=
sum( xmm1 );
1426 template<
typename MT3
1429 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1430 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1432 typedef IntrinsicTrait<ElementType> IT;
1434 const size_t M( A.rows() );
1435 const size_t N( B.columns() );
1436 const size_t K( A.columns() );
1440 for( ; (i+4UL) <= M; i+=4UL ) {
1442 for( ; (j+2UL) <= N; j+=2UL ) {
1443 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1444 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1451 xmm1 = xmm1 + a1 * b1;
1452 xmm2 = xmm2 + a1 * b2;
1453 xmm3 = xmm3 + a2 * b1;
1454 xmm4 = xmm4 + a2 * b2;
1455 xmm5 = xmm5 + a3 * b1;
1456 xmm6 = xmm6 + a3 * b2;
1457 xmm7 = xmm7 + a4 * b1;
1458 xmm8 = xmm8 + a4 * b2;
1460 (~C)(i ,j ) +=
sum( xmm1 );
1461 (~C)(i ,j+1UL) +=
sum( xmm2 );
1462 (~C)(i+1UL,j ) +=
sum( xmm3 );
1463 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1464 (~C)(i+2UL,j ) +=
sum( xmm5 );
1465 (~C)(i+2UL,j+1UL) +=
sum( xmm6 );
1466 (~C)(i+3UL,j ) +=
sum( xmm7 );
1467 (~C)(i+3UL,j+1UL) +=
sum( xmm8 );
1471 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1473 xmm1 = xmm1 + A.load(i ,k) * b1;
1474 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1475 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1476 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1478 (~C)(i ,j) +=
sum( xmm1 );
1479 (~C)(i+1UL,j) +=
sum( xmm2 );
1480 (~C)(i+2UL,j) +=
sum( xmm3 );
1481 (~C)(i+3UL,j) +=
sum( xmm4 );
1484 for( ; (i+2UL) <= M; i+=2UL ) {
1486 for( ; (j+2UL) <= N; j+=2UL ) {
1488 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1493 xmm1 = xmm1 + a1 * b1;
1494 xmm2 = xmm2 + a1 * b2;
1495 xmm3 = xmm3 + a2 * b1;
1496 xmm4 = xmm4 + a2 * b2;
1498 (~C)(i ,j ) +=
sum( xmm1 );
1499 (~C)(i ,j+1UL) +=
sum( xmm2 );
1500 (~C)(i+1UL,j ) +=
sum( xmm3 );
1501 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1505 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1507 xmm1 = xmm1 + A.load(i ,k) * b1;
1508 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1510 (~C)(i ,j) +=
sum( xmm1 );
1511 (~C)(i+1UL,j) +=
sum( xmm2 );
1516 for( ; (j+2UL) <= N; j+=2UL ) {
1518 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1520 xmm1 = xmm1 + a1 * B.load(k,j );
1521 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1523 (~C)(i,j ) +=
sum( xmm1 );
1524 (~C)(i,j+1UL) +=
sum( xmm2 );
1528 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1529 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1531 (~C)(i,j) +=
sum( xmm1 );
1552 template<
typename MT3
1555 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1556 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1558 selectDefaultAddAssignKernel( C, A, B );
1578 template<
typename MT3
1581 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1582 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1584 sgemm( C, A, B, 1.0F, 1.0F );
1605 template<
typename MT3
1608 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1609 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1611 dgemm( C, A, B, 1.0, 1.0 );
1632 template<
typename MT3
1635 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1636 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1638 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1659 template<
typename MT3
1662 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1663 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1665 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1688 template<
typename MT
1697 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1711 DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1727 template<
typename MT3
1730 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1733 DMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1735 DMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1754 template<
typename MT3
1757 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1758 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1760 const size_t M( A.rows() );
1761 const size_t N( B.columns() );
1762 const size_t K( A.columns() );
1765 const size_t end( N &
size_t(-2) );
1767 for(
size_t i=0UL; i<M; ++i ) {
1768 for(
size_t k=0UL; k<K; ++k ) {
1769 for(
size_t j=0UL; j<
end; j+=2UL ) {
1770 C(i,j ) -= A(i,k) * B(k,j );
1771 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1774 C(i,end) -= A(i,k) * B(k,end);
1796 template<
typename MT3
1799 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1800 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1802 typedef IntrinsicTrait<ElementType> IT;
1804 const size_t M( A.rows() );
1805 const size_t N( B.columns() );
1806 const size_t K( A.columns() );
1810 for( ; (i+2UL) <= M; i+=2UL ) {
1812 for( ; (j+4UL) <= N; j+=4UL ) {
1813 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1814 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1821 xmm1 = xmm1 + a1 * b1;
1822 xmm2 = xmm2 + a1 * b2;
1823 xmm3 = xmm3 + a1 * b3;
1824 xmm4 = xmm4 + a1 * b4;
1825 xmm5 = xmm5 + a2 * b1;
1826 xmm6 = xmm6 + a2 * b2;
1827 xmm7 = xmm7 + a2 * b3;
1828 xmm8 = xmm8 + a2 * b4;
1830 (~C)(i ,j ) -=
sum( xmm1 );
1831 (~C)(i ,j+1UL) -=
sum( xmm2 );
1832 (~C)(i ,j+2UL) -=
sum( xmm3 );
1833 (~C)(i ,j+3UL) -=
sum( xmm4 );
1834 (~C)(i+1UL,j ) -=
sum( xmm5 );
1835 (~C)(i+1UL,j+1UL) -=
sum( xmm6 );
1836 (~C)(i+1UL,j+2UL) -=
sum( xmm7 );
1837 (~C)(i+1UL,j+3UL) -=
sum( xmm8 );
1839 for( ; (j+2UL) <= N; j+=2UL ) {
1841 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1846 xmm1 = xmm1 + a1 * b1;
1847 xmm2 = xmm2 + a1 * b2;
1848 xmm3 = xmm3 + a2 * b1;
1849 xmm4 = xmm4 + a2 * b2;
1851 (~C)(i ,j ) -=
sum( xmm1 );
1852 (~C)(i ,j+1UL) -=
sum( xmm2 );
1853 (~C)(i+1UL,j ) -=
sum( xmm3 );
1854 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1858 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1860 xmm1 = xmm1 + A.load(i ,k) * b1;
1861 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1863 (~C)(i ,j) -=
sum( xmm1 );
1864 (~C)(i+1UL,j) -=
sum( xmm2 );
1869 for( ; (j+4UL) <= N; j+=4UL ) {
1871 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1873 xmm1 = xmm1 + a1 * B.load(k,j );
1874 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1875 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1876 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1878 (~C)(i,j ) -=
sum( xmm1 );
1879 (~C)(i,j+1UL) -=
sum( xmm2 );
1880 (~C)(i,j+2UL) -=
sum( xmm3 );
1881 (~C)(i,j+3UL) -=
sum( xmm4 );
1883 for( ; (j+2UL) <= N; j+=2UL ) {
1885 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1887 xmm1 = xmm1 + a1 * B.load(k,j );
1888 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1890 (~C)(i,j ) -=
sum( xmm1 );
1891 (~C)(i,j+1UL) -=
sum( xmm2 );
1895 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1896 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1898 (~C)(i,j) -=
sum( xmm1 );
1919 template<
typename MT3
1922 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1923 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1925 typedef IntrinsicTrait<ElementType> IT;
1927 const size_t M( A.rows() );
1928 const size_t N( B.columns() );
1929 const size_t K( A.columns() );
1933 for( ; (i+4UL) <= M; i+=4UL ) {
1935 for( ; (j+2UL) <= N; j+=2UL ) {
1936 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1937 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1944 xmm1 = xmm1 + a1 * b1;
1945 xmm2 = xmm2 + a1 * b2;
1946 xmm3 = xmm3 + a2 * b1;
1947 xmm4 = xmm4 + a2 * b2;
1948 xmm5 = xmm5 + a3 * b1;
1949 xmm6 = xmm6 + a3 * b2;
1950 xmm7 = xmm7 + a4 * b1;
1951 xmm8 = xmm8 + a4 * b2;
1953 (~C)(i ,j ) -=
sum( xmm1 );
1954 (~C)(i ,j+1UL) -=
sum( xmm2 );
1955 (~C)(i+1UL,j ) -=
sum( xmm3 );
1956 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1957 (~C)(i+2UL,j ) -=
sum( xmm5 );
1958 (~C)(i+2UL,j+1UL) -=
sum( xmm6 );
1959 (~C)(i+3UL,j ) -=
sum( xmm7 );
1960 (~C)(i+3UL,j+1UL) -=
sum( xmm8 );
1964 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1966 xmm1 = xmm1 + A.load(i ,k) * b1;
1967 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1968 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1969 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1971 (~C)(i ,j) -=
sum( xmm1 );
1972 (~C)(i+1UL,j) -=
sum( xmm2 );
1973 (~C)(i+2UL,j) -=
sum( xmm3 );
1974 (~C)(i+3UL,j) -=
sum( xmm4 );
1977 for( ; (i+2UL) <= M; i+=2UL ) {
1979 for( ; (j+2UL) <= N; j+=2UL ) {
1981 for(
size_t k=0UL; k<K; k+=
IT::size ) {
1986 xmm1 = xmm1 + a1 * b1;
1987 xmm2 = xmm2 + a1 * b2;
1988 xmm3 = xmm3 + a2 * b1;
1989 xmm4 = xmm4 + a2 * b2;
1991 (~C)(i ,j ) -=
sum( xmm1 );
1992 (~C)(i ,j+1UL) -=
sum( xmm2 );
1993 (~C)(i+1UL,j ) -=
sum( xmm3 );
1994 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1998 for(
size_t k=0UL; k<K; k+=
IT::size ) {
2000 xmm1 = xmm1 + A.load(i ,k) * b1;
2001 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2003 (~C)(i ,j) -=
sum( xmm1 );
2004 (~C)(i+1UL,j) -=
sum( xmm2 );
2009 for( ; (j+2UL) <= N; j+=2UL ) {
2011 for(
size_t k=0UL; k<K; k+=
IT::size ) {
2013 xmm1 = xmm1 + a1 * B.load(k,j );
2014 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2016 (~C)(i,j ) -=
sum( xmm1 );
2017 (~C)(i,j+1UL) -=
sum( xmm2 );
2021 for(
size_t k=0UL; k<K; k+=
IT::size ) {
2022 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2024 (~C)(i,j) -=
sum( xmm1 );
2045 template<
typename MT3
2048 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2049 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2051 selectDefaultSubAssignKernel( C, A, B );
2071 template<
typename MT3
2074 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2075 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2077 sgemm( C, A, B, -1.0F, 1.0F );
2098 template<
typename MT3
2101 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2102 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2104 dgemm( C, A, B, -1.0, 1.0 );
2125 template<
typename MT3
2128 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2129 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2131 cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2152 template<
typename MT3
2155 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2156 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2158 zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2191 template<
typename MT
2193 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2201 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2204 else if( rhs.lhs_.columns() == 0UL ) {
2239 template<
typename MT
2241 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2246 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2258 const TmpType tmp( rhs );
2280 template<
typename MT
2282 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2290 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2329 template<
typename MT
2331 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2339 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2399 template<
typename MT1
2403 :
public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2404 ,
private MatScalarMultExpr
2405 ,
private Computation
2409 typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2421 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2426 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2434 template<
typename T1,
typename T2,
typename T3 >
2435 struct IsEvaluationRequired {
2436 enum { value = ( evaluateLeft || evaluateRight ) };
2445 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2446 struct UseSinglePrecisionKernel {
2448 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2449 IsFloat<typename T1::ElementType>::value &&
2450 IsFloat<typename T2::ElementType>::value &&
2451 IsFloat<typename T3::ElementType>::value &&
2452 !IsComplex<T4>::value };
2461 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2462 struct UseDoublePrecisionKernel {
2464 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2465 IsDouble<typename T1::ElementType>::value &&
2466 IsDouble<typename T2::ElementType>::value &&
2467 IsDouble<typename T3::ElementType>::value &&
2468 !IsComplex<T4>::value };
2477 template<
typename T1,
typename T2,
typename T3 >
2478 struct UseSinglePrecisionComplexKernel {
2479 typedef complex<float> Type;
2481 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2482 IsSame<typename T1::ElementType,Type>::value &&
2483 IsSame<typename T2::ElementType,Type>::value &&
2484 IsSame<typename T3::ElementType,Type>::value };
2493 template<
typename T1,
typename T2,
typename T3 >
2494 struct UseDoublePrecisionComplexKernel {
2495 typedef complex<double> Type;
2497 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2498 IsSame<typename T1::ElementType,Type>::value &&
2499 IsSame<typename T2::ElementType,Type>::value &&
2500 IsSame<typename T3::ElementType,Type>::value };
2508 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2509 struct UseDefaultKernel {
2510 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2511 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2512 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2513 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2521 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2522 struct UseVectorizedDefaultKernel {
2523 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2524 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2525 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2526 IsSame<typename T1::ElementType,T4>::value &&
2527 IntrinsicTrait<typename T1::ElementType>::addition &&
2528 IntrinsicTrait<typename T1::ElementType>::multiplication };
2534 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2535 typedef typename MultTrait<RES,ST>::Type
ResultType;
2539 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2544 typedef const DMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2550 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2553 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2558 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2559 IsSame<ET1,ET2>::value &&
2560 IsSame<ET1,ST>::value &&
2561 IntrinsicTrait<ET1>::addition &&
2562 IntrinsicTrait<ET1>::multiplication };
2565 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2566 !evaluateRight && MT2::smpAssignable };
2575 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2591 return matrix_(i,j) * scalar_;
2600 inline size_t rows()
const {
2601 return matrix_.rows();
2610 inline size_t columns()
const {
2611 return matrix_.columns();
2641 template<
typename T >
2642 inline bool canAlias(
const T* alias )
const {
2643 return matrix_.canAlias( alias );
2653 template<
typename T >
2654 inline bool isAliased(
const T* alias )
const {
2655 return matrix_.isAliased( alias );
2665 return matrix_.isAligned();
2675 typename MMM::LeftOperand A( matrix_.leftOperand() );
2704 template<
typename MT3
2707 static inline void sgemm( MT3& C,
const MT4& A,
const MT5& B,
float alpha,
float beta )
2709 using boost::numeric_cast;
2715 const int M ( numeric_cast<int>( A.rows() ) );
2716 const int N ( numeric_cast<int>( B.columns() ) );
2717 const int K ( numeric_cast<int>( A.columns() ) );
2718 const int lda( numeric_cast<int>( A.spacing() ) );
2719 const int ldb( numeric_cast<int>( B.spacing() ) );
2720 const int ldc( numeric_cast<int>( C.spacing() ) );
2722 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2723 cblas_ssymm( CblasColMajor, CblasLeft, CblasLower,
2724 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2726 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2727 cblas_ssymm( CblasRowMajor, CblasRight, CblasUpper,
2728 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2731 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2732 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2733 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2734 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2756 template<
typename MT3
2759 static inline void dgemm( MT3& C,
const MT4& A,
const MT5& B,
double alpha,
double beta )
2761 using boost::numeric_cast;
2767 const int M ( numeric_cast<int>( A.rows() ) );
2768 const int N ( numeric_cast<int>( B.columns() ) );
2769 const int K ( numeric_cast<int>( A.columns() ) );
2770 const int lda( numeric_cast<int>( A.spacing() ) );
2771 const int ldb( numeric_cast<int>( B.spacing() ) );
2772 const int ldc( numeric_cast<int>( C.spacing() ) );
2774 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2775 cblas_dsymm( CblasColMajor, CblasLeft, CblasLower,
2776 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2778 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2779 cblas_dsymm( CblasRowMajor, CblasRight, CblasUpper,
2780 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2783 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2784 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2785 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2786 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2808 template<
typename MT3
2811 static inline void cgemm( MT3& C,
const MT4& A,
const MT5& B,
2812 complex<float> alpha, complex<float> beta )
2814 using boost::numeric_cast;
2823 const int M ( numeric_cast<int>( A.rows() ) );
2824 const int N ( numeric_cast<int>( B.columns() ) );
2825 const int K ( numeric_cast<int>( A.columns() ) );
2826 const int lda( numeric_cast<int>( A.spacing() ) );
2827 const int ldb( numeric_cast<int>( B.spacing() ) );
2828 const int ldc( numeric_cast<int>( C.spacing() ) );
2830 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2831 cblas_csymm( CblasColMajor, CblasLeft, CblasLower,
2832 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2834 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2835 cblas_csymm( CblasRowMajor, CblasRight, CblasUpper,
2836 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2839 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2840 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2841 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2842 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2864 template<
typename MT3
2867 static inline void zgemm( MT3& C,
const MT4& A,
const MT5& B,
2868 complex<double> alpha, complex<double> beta )
2870 using boost::numeric_cast;
2879 const int M ( numeric_cast<int>( A.rows() ) );
2880 const int N ( numeric_cast<int>( B.columns() ) );
2881 const int K ( numeric_cast<int>( A.columns() ) );
2882 const int lda( numeric_cast<int>( A.spacing() ) );
2883 const int ldb( numeric_cast<int>( B.spacing() ) );
2884 const int ldc( numeric_cast<int>( C.spacing() ) );
2886 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2887 cblas_zsymm( CblasColMajor, CblasLeft, CblasLower,
2888 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2890 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2891 cblas_zsymm( CblasRowMajor, CblasRight, CblasUpper,
2892 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2895 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2896 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2897 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2898 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2916 template<
typename MT
2918 friend inline void assign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2925 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2926 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2928 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2931 else if( left.columns() == 0UL ) {
2946 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2961 template<
typename MT3
2965 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2968 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2970 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2988 template<
typename MT3
2992 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2993 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2995 for(
size_t i=0UL; i<A.rows(); ++i ) {
2996 for(
size_t k=0UL; k<B.columns(); ++k ) {
2997 C(i,k) = A(i,0UL) * B(0UL,k);
2999 for(
size_t j=1UL; j<A.columns(); ++j ) {
3000 for(
size_t k=0UL; k<B.columns(); ++k ) {
3001 C(i,k) += A(i,j) * B(j,k);
3004 for(
size_t k=0UL; k<B.columns(); ++k ) {
3025 template<
typename MT3
3029 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3030 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3032 typedef IntrinsicTrait<ElementType> IT;
3034 const size_t M( A.rows() );
3035 const size_t N( B.columns() );
3036 const size_t K( A.columns() );
3040 for( ; (i+2UL) <= M; i+=2UL ) {
3042 for( ; (j+4UL) <= N; j+=4UL ) {
3043 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3044 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3051 xmm1 = xmm1 + a1 * b1;
3052 xmm2 = xmm2 + a1 * b2;
3053 xmm3 = xmm3 + a1 * b3;
3054 xmm4 = xmm4 + a1 * b4;
3055 xmm5 = xmm5 + a2 * b1;
3056 xmm6 = xmm6 + a2 * b2;
3057 xmm7 = xmm7 + a2 * b3;
3058 xmm8 = xmm8 + a2 * b4;
3060 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
3061 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
3062 (~C)(i ,j+2UL) =
sum( xmm3 ) * scalar;
3063 (~C)(i ,j+3UL) =
sum( xmm4 ) * scalar;
3064 (~C)(i+1UL,j ) =
sum( xmm5 ) * scalar;
3065 (~C)(i+1UL,j+1UL) =
sum( xmm6 ) * scalar;
3066 (~C)(i+1UL,j+2UL) =
sum( xmm7 ) * scalar;
3067 (~C)(i+1UL,j+3UL) =
sum( xmm8 ) * scalar;
3069 for( ; (j+2UL) <= N; j+=2UL ) {
3071 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3076 xmm1 = xmm1 + a1 * b1;
3077 xmm2 = xmm2 + a1 * b2;
3078 xmm3 = xmm3 + a2 * b1;
3079 xmm4 = xmm4 + a2 * b2;
3081 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
3082 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
3083 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
3084 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
3088 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3090 xmm1 = xmm1 + A.load(i ,k) * b1;
3091 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3093 (~C)(i ,j) =
sum( xmm1 ) * scalar;
3094 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
3099 for( ; (j+4UL) <= N; j+=4UL ) {
3101 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3103 xmm1 = xmm1 + a1 * B.load(k,j );
3104 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3105 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3106 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3108 (~C)(i,j ) =
sum( xmm1 ) * scalar;
3109 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
3110 (~C)(i,j+2UL) =
sum( xmm3 ) * scalar;
3111 (~C)(i,j+3UL) =
sum( xmm4 ) * scalar;
3113 for( ; (j+2UL) <= N; j+=2UL ) {
3115 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3117 xmm1 = xmm1 + a1 * B.load(k,j );
3118 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3120 (~C)(i,j ) =
sum( xmm1 ) * scalar;
3121 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
3125 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3126 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3128 (~C)(i,j) =
sum( xmm1 ) * scalar;
3148 template<
typename MT3
3152 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3153 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3155 typedef IntrinsicTrait<ElementType> IT;
3157 const size_t M( A.rows() );
3158 const size_t N( B.columns() );
3159 const size_t K( A.columns() );
3163 for( ; (i+4UL) <= M; i+=4UL ) {
3165 for( ; (j+2UL) <= N; j+=2UL ) {
3166 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3167 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3174 xmm1 = xmm1 + a1 * b1;
3175 xmm2 = xmm2 + a1 * b2;
3176 xmm3 = xmm3 + a2 * b1;
3177 xmm4 = xmm4 + a2 * b2;
3178 xmm5 = xmm5 + a3 * b1;
3179 xmm6 = xmm6 + a3 * b2;
3180 xmm7 = xmm7 + a4 * b1;
3181 xmm8 = xmm8 + a4 * b2;
3183 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
3184 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
3185 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
3186 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
3187 (~C)(i+2UL,j ) =
sum( xmm5 ) * scalar;
3188 (~C)(i+2UL,j+1UL) =
sum( xmm6 ) * scalar;
3189 (~C)(i+3UL,j ) =
sum( xmm7 ) * scalar;
3190 (~C)(i+3UL,j+1UL) =
sum( xmm8 ) * scalar;
3194 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3196 xmm1 = xmm1 + A.load(i ,k) * b1;
3197 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3198 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3199 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3201 (~C)(i ,j) =
sum( xmm1 ) * scalar;
3202 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
3203 (~C)(i+2UL,j) =
sum( xmm3 ) * scalar;
3204 (~C)(i+3UL,j) =
sum( xmm4 ) * scalar;
3207 for( ; (i+2UL) <= M; i+=2UL ) {
3209 for( ; (j+2UL) <= N; j+=2UL ) {
3211 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3216 xmm1 = xmm1 + a1 * b1;
3217 xmm2 = xmm2 + a1 * b2;
3218 xmm3 = xmm3 + a2 * b1;
3219 xmm4 = xmm4 + a2 * b2;
3221 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
3222 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
3223 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
3224 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
3228 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3230 xmm1 = xmm1 + A.load(i ,k) * b1;
3231 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3233 (~C)(i ,j) =
sum( xmm1 ) * scalar;
3234 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
3239 for( ; (j+2UL) <= N; j+=2UL ) {
3241 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3243 xmm1 = xmm1 + a1 * B.load(k,j );
3244 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3246 (~C)(i,j ) =
sum( xmm1 ) * scalar;
3247 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
3251 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3252 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3254 (~C)(i,j) =
sum( xmm1 ) * scalar;
3274 template<
typename MT3
3278 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3279 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3281 selectDefaultAssignKernel( C, A, B, scalar );
3300 template<
typename MT3
3304 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3305 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3307 sgemm( C, A, B, scalar, 0.0F );
3327 template<
typename MT3
3331 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3332 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3334 dgemm( C, A, B, scalar, 0.0 );
3354 template<
typename MT3
3358 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3359 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3361 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3381 template<
typename MT3
3385 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3386 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3388 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3405 template<
typename MT
3407 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3411 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3423 const TmpType tmp(
serial( rhs ) );
3440 template<
typename MT
3442 friend inline void addAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3449 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3450 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3452 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3466 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3481 template<
typename MT3
3485 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3488 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3490 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3508 template<
typename MT3
3512 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3513 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3534 template<
typename MT3
3538 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3539 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3541 typedef IntrinsicTrait<ElementType> IT;
3543 const size_t M( A.rows() );
3544 const size_t N( B.columns() );
3545 const size_t K( A.columns() );
3549 for( ; (i+2UL) <= M; i+=2UL ) {
3551 for( ; (j+4UL) <= N; j+=4UL ) {
3552 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3553 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3560 xmm1 = xmm1 + a1 * b1;
3561 xmm2 = xmm2 + a1 * b2;
3562 xmm3 = xmm3 + a1 * b3;
3563 xmm4 = xmm4 + a1 * b4;
3564 xmm5 = xmm5 + a2 * b1;
3565 xmm6 = xmm6 + a2 * b2;
3566 xmm7 = xmm7 + a2 * b3;
3567 xmm8 = xmm8 + a2 * b4;
3569 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3570 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3571 (~C)(i ,j+2UL) +=
sum( xmm3 ) * scalar;
3572 (~C)(i ,j+3UL) +=
sum( xmm4 ) * scalar;
3573 (~C)(i+1UL,j ) +=
sum( xmm5 ) * scalar;
3574 (~C)(i+1UL,j+1UL) +=
sum( xmm6 ) * scalar;
3575 (~C)(i+1UL,j+2UL) +=
sum( xmm7 ) * scalar;
3576 (~C)(i+1UL,j+3UL) +=
sum( xmm8 ) * scalar;
3578 for( ; (j+2UL) <= N; j+=2UL ) {
3580 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3585 xmm1 = xmm1 + a1 * b1;
3586 xmm2 = xmm2 + a1 * b2;
3587 xmm3 = xmm3 + a2 * b1;
3588 xmm4 = xmm4 + a2 * b2;
3590 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3591 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3592 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3593 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3597 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3599 xmm1 = xmm1 + A.load(i ,k) * b1;
3600 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3602 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3603 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3608 for( ; (j+4UL) <= N; j+=4UL ) {
3610 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3612 xmm1 = xmm1 + a1 * B.load(k,j );
3613 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3614 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3615 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3617 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3618 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3619 (~C)(i,j+2UL) +=
sum( xmm3 ) * scalar;
3620 (~C)(i,j+3UL) +=
sum( xmm4 ) * scalar;
3622 for( ; (j+2UL) <= N; j+=2UL ) {
3624 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3626 xmm1 = xmm1 + a1 * B.load(k,j );
3627 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3629 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3630 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3634 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3635 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3637 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3657 template<
typename MT3
3661 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3662 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3664 typedef IntrinsicTrait<ElementType> IT;
3666 const size_t M( A.rows() );
3667 const size_t N( B.columns() );
3668 const size_t K( A.columns() );
3672 for( ; (i+4UL) <= M; i+=4UL ) {
3674 for( ; (j+2UL) <= N; j+=2UL ) {
3675 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3676 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3683 xmm1 = xmm1 + a1 * b1;
3684 xmm2 = xmm2 + a1 * b2;
3685 xmm3 = xmm3 + a2 * b1;
3686 xmm4 = xmm4 + a2 * b2;
3687 xmm5 = xmm5 + a3 * b1;
3688 xmm6 = xmm6 + a3 * b2;
3689 xmm7 = xmm7 + a4 * b1;
3690 xmm8 = xmm8 + a4 * b2;
3692 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3693 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3694 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3695 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3696 (~C)(i+2UL,j ) +=
sum( xmm5 ) * scalar;
3697 (~C)(i+2UL,j+1UL) +=
sum( xmm6 ) * scalar;
3698 (~C)(i+3UL,j ) +=
sum( xmm7 ) * scalar;
3699 (~C)(i+3UL,j+1UL) +=
sum( xmm8 ) * scalar;
3703 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3705 xmm1 = xmm1 + A.load(i ,k) * b1;
3706 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3707 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3708 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3710 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3711 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3712 (~C)(i+2UL,j) +=
sum( xmm3 ) * scalar;
3713 (~C)(i+3UL,j) +=
sum( xmm4 ) * scalar;
3716 for( ; (i+2UL) <= M; i+=2UL ) {
3718 for( ; (j+2UL) <= N; j+=2UL ) {
3720 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3725 xmm1 = xmm1 + a1 * b1;
3726 xmm2 = xmm2 + a1 * b2;
3727 xmm3 = xmm3 + a2 * b1;
3728 xmm4 = xmm4 + a2 * b2;
3730 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3731 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3732 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3733 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3737 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3739 xmm1 = xmm1 + A.load(i ,k) * b1;
3740 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3742 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3743 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3748 for( ; (j+2UL) <= N; j+=2UL ) {
3750 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3752 xmm1 = xmm1 + a1 * B.load(k,j );
3753 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3755 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3756 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3760 for(
size_t k=0UL; k<K; k+=
IT::size ) {
3761 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3763 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3783 template<
typename MT3
3787 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3788 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3790 selectDefaultAddAssignKernel( C, A, B, scalar );
3809 template<
typename MT3
3813 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3814 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3816 sgemm( C, A, B, scalar, 1.0F );
3836 template<
typename MT3
3840 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3841 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3843 dgemm( C, A, B, scalar, 1.0 );
3863 template<
typename MT3
3867 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3868 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3870 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3890 template<
typename MT3
3894 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3895 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3897 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3918 template<
typename MT
3920 friend inline void subAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3927 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3928 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3930 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3944 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3959 template<
typename MT3
3963 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3966 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3968 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3986 template<
typename MT3
3990 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3991 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4012 template<
typename MT3
4016 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4017 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4019 typedef IntrinsicTrait<ElementType> IT;
4021 const size_t M( A.rows() );
4022 const size_t N( B.columns() );
4023 const size_t K( A.columns() );
4027 for( ; (i+2UL) <= M; i+=2UL ) {
4029 for( ; (j+4UL) <= N; j+=4UL ) {
4030 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4031 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4038 xmm1 = xmm1 + a1 * b1;
4039 xmm2 = xmm2 + a1 * b2;
4040 xmm3 = xmm3 + a1 * b3;
4041 xmm4 = xmm4 + a1 * b4;
4042 xmm5 = xmm5 + a2 * b1;
4043 xmm6 = xmm6 + a2 * b2;
4044 xmm7 = xmm7 + a2 * b3;
4045 xmm8 = xmm8 + a2 * b4;
4047 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
4048 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
4049 (~C)(i ,j+2UL) -=
sum( xmm3 ) * scalar;
4050 (~C)(i ,j+3UL) -=
sum( xmm4 ) * scalar;
4051 (~C)(i+1UL,j ) -=
sum( xmm5 ) * scalar;
4052 (~C)(i+1UL,j+1UL) -=
sum( xmm6 ) * scalar;
4053 (~C)(i+1UL,j+2UL) -=
sum( xmm7 ) * scalar;
4054 (~C)(i+1UL,j+3UL) -=
sum( xmm8 ) * scalar;
4056 for( ; (j+2UL) <= N; j+=2UL ) {
4058 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4063 xmm1 = xmm1 + a1 * b1;
4064 xmm2 = xmm2 + a1 * b2;
4065 xmm3 = xmm3 + a2 * b1;
4066 xmm4 = xmm4 + a2 * b2;
4068 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
4069 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
4070 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
4071 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
4075 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4077 xmm1 = xmm1 + A.load(i ,k) * b1;
4078 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4080 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
4081 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
4086 for( ; (j+4UL) <= N; j+=4UL ) {
4088 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4090 xmm1 = xmm1 + a1 * B.load(k,j );
4091 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4092 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
4093 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
4095 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
4096 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
4097 (~C)(i,j+2UL) -=
sum( xmm3 ) * scalar;
4098 (~C)(i,j+3UL) -=
sum( xmm4 ) * scalar;
4100 for( ; (j+2UL) <= N; j+=2UL ) {
4102 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4104 xmm1 = xmm1 + a1 * B.load(k,j );
4105 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4107 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
4108 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
4112 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4113 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4115 (~C)(i,j) -=
sum( xmm1 ) * scalar;
4135 template<
typename MT3
4139 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4140 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4142 typedef IntrinsicTrait<ElementType> IT;
4144 const size_t M( A.rows() );
4145 const size_t N( B.columns() );
4146 const size_t K( A.columns() );
4150 for( ; (i+4UL) <= M; i+=4UL ) {
4152 for( ; (j+2UL) <= N; j+=2UL ) {
4153 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4154 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4161 xmm1 = xmm1 + a1 * b1;
4162 xmm2 = xmm2 + a1 * b2;
4163 xmm3 = xmm3 + a2 * b1;
4164 xmm4 = xmm4 + a2 * b2;
4165 xmm5 = xmm5 + a3 * b1;
4166 xmm6 = xmm6 + a3 * b2;
4167 xmm7 = xmm7 + a4 * b1;
4168 xmm8 = xmm8 + a4 * b2;
4170 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
4171 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
4172 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
4173 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
4174 (~C)(i+2UL,j ) -=
sum( xmm5 ) * scalar;
4175 (~C)(i+2UL,j+1UL) -=
sum( xmm6 ) * scalar;
4176 (~C)(i+3UL,j ) -=
sum( xmm7 ) * scalar;
4177 (~C)(i+3UL,j+1UL) -=
sum( xmm8 ) * scalar;
4181 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4183 xmm1 = xmm1 + A.load(i ,k) * b1;
4184 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4185 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
4186 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
4188 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
4189 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
4190 (~C)(i+2UL,j) -=
sum( xmm3 ) * scalar;
4191 (~C)(i+3UL,j) -=
sum( xmm4 ) * scalar;
4194 for( ; (i+2UL) <= M; i+=2UL ) {
4196 for( ; (j+2UL) <= N; j+=2UL ) {
4198 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4203 xmm1 = xmm1 + a1 * b1;
4204 xmm2 = xmm2 + a1 * b2;
4205 xmm3 = xmm3 + a2 * b1;
4206 xmm4 = xmm4 + a2 * b2;
4208 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
4209 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
4210 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
4211 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
4215 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4217 xmm1 = xmm1 + A.load(i ,k) * b1;
4218 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4220 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
4221 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
4226 for( ; (j+2UL) <= N; j+=2UL ) {
4228 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4230 xmm1 = xmm1 + a1 * B.load(k,j );
4231 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4233 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
4234 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
4238 for(
size_t k=0UL; k<K; k+=
IT::size ) {
4239 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4241 (~C)(i,j) -=
sum( xmm1 ) * scalar;
4261 template<
typename MT3
4265 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4266 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4268 selectDefaultSubAssignKernel( C, A, B, scalar );
4287 template<
typename MT3
4291 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4292 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4294 sgemm( C, A, B, -scalar, 1.0F );
4314 template<
typename MT3
4318 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4319 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4321 dgemm( C, A, B, -scalar, 1.0 );
4341 template<
typename MT3
4345 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4346 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4348 cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4368 template<
typename MT3
4372 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4373 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4375 zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4407 template<
typename MT
4409 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4410 smpAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4417 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4418 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4420 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
4423 else if( left.columns() == 0UL ) {
4457 template<
typename MT
4459 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4460 smpAssign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4464 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4476 const TmpType tmp( rhs );
4496 template<
typename MT
4498 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4499 smpAddAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4506 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4507 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4509 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4546 template<
typename MT
4548 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4549 smpSubAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4556 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4557 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4559 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4641 template<
typename T1
4643 inline const DMatTDMatMultExpr<T1,T2>
4649 throw std::invalid_argument(
"Matrix sizes do not match" );
4666 template<
typename MT1,
typename MT2 >
4684 template<
typename MT1,
typename MT2 >
4686 :
public Columns<MT2>
4702 template<
typename MT1,
typename MT2 >
4704 :
public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
4720 template<
typename MT1,
typename MT2 >
4722 :
public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
4738 template<
typename MT1,
typename MT2,
typename VT >
4743 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4744 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4745 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4746 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4747 , INVALID_TYPE >::Type Type;
4756 template<
typename MT1,
typename MT2,
typename VT >
4761 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4762 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4763 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4764 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4765 , INVALID_TYPE >::Type Type;
4774 template<
typename VT,
typename MT1,
typename MT2 >
4779 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4780 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4781 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4782 ,
typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4783 , INVALID_TYPE >::Type Type;
4792 template<
typename VT,
typename MT1,
typename MT2 >
4797 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4798 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4799 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4800 ,
typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4801 , INVALID_TYPE >::Type Type;
4810 template<
typename MT1,
typename MT2,
bool AF >
4815 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4816 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4825 template<
typename MT1,
typename MT2 >
4830 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4839 template<
typename MT1,
typename MT2 >
4844 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:419
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:262
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:270
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:264
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:315
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:409
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:260
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:130
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:132
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:300
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:355
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:267
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
Header file for the IsDouble type trait.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
const size_t SMP_DMATTDMATMULT_THRESHOLD
SMP row-major dense matrix/column-major dense matrix multiplication threshold.This threshold specifie...
Definition: Thresholds.h:857
Header file for the TDMatSVecMultExprTrait class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:399
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:129
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:133
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:265
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:345
Header file for the IsNumeric type trait.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:104
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:261
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:142
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:387
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:131
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:365
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:266
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:428
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:122
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:128
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:429
Header file for the IsUpper type trait.
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:375
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:263
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:276
Constraint on the data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:279
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:273
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849