35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
44 #include <boost/cast.hpp>
106 template<
typename MT
139 template<
typename T1 >
140 struct UseSMPAssign {
141 enum { value = ( evaluateMatrix || evaluateVector ) };
152 template<
typename T1,
typename T2,
typename T3 >
153 struct UseSinglePrecisionKernel {
155 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
169 template<
typename T1,
typename T2,
typename T3 >
170 struct UseDoublePrecisionKernel {
172 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
186 template<
typename T1,
typename T2,
typename T3 >
187 struct UseSinglePrecisionComplexKernel {
188 typedef complex<float> Type;
190 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
204 template<
typename T1,
typename T2,
typename T3 >
205 struct UseDoublePrecisionComplexKernel {
206 typedef complex<double> Type;
208 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
221 template<
typename T1,
typename T2,
typename T3 >
222 struct UseDefaultKernel {
223 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
224 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
225 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
226 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
237 template<
typename T1,
typename T2,
typename T3 >
238 struct UseVectorizedDefaultKernel {
239 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
273 enum { vectorizable = MT::vectorizable && VT::vectorizable &&
279 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
280 !evaluateVector && VT::smpAssignable };
309 if(
mat_.columns() != 0UL ) {
311 for(
size_t j=1UL; j<
end_; j+=2UL ) {
314 if( end_ <
mat_.columns() ) {
362 template<
typename T >
364 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
374 template<
typename T >
376 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
386 return mat_.isAligned() &&
vec_.isAligned();
427 template<
typename VT1
430 static inline void sgemv( VT1& y,
const MT1& A,
const VT2& x,
float alpha,
float beta )
432 using boost::numeric_cast;
438 const int M ( numeric_cast<int>( A.rows() ) );
439 const int N ( numeric_cast<int>( A.columns() ) );
440 const int lda( numeric_cast<int>( A.spacing() ) );
442 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, alpha,
443 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
466 template<
typename VT1
469 static inline void dgemv( VT1& y,
const MT1& A,
const VT2& x,
double alpha,
double beta )
471 using boost::numeric_cast;
477 const int M ( numeric_cast<int>( A.rows() ) );
478 const int N ( numeric_cast<int>( A.columns() ) );
479 const int lda( numeric_cast<int>( A.spacing() ) );
481 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, alpha,
482 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
505 template<
typename VT1
508 static inline void cgemv( VT1& y,
const MT1& A,
const VT2& x,
509 complex<float> alpha, complex<float> beta )
511 using boost::numeric_cast;
520 const int M ( numeric_cast<int>( A.rows() ) );
521 const int N ( numeric_cast<int>( A.columns() ) );
522 const int lda( numeric_cast<int>( A.spacing() ) );
524 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
525 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
548 template<
typename VT1
551 static inline void zgemv( VT1& y,
const MT1& A,
const VT2& x,
552 complex<double> alpha, complex<double> beta )
554 using boost::numeric_cast;
563 const int M ( numeric_cast<int>( A.rows() ) );
564 const int N ( numeric_cast<int>( A.columns() ) );
565 const int lda( numeric_cast<int>( A.spacing() ) );
567 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
568 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
587 template<
typename VT1 >
594 if( rhs.mat_.rows() == 0UL ) {
597 else if( rhs.mat_.columns() == 0UL ) {
610 DMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
626 template<
typename VT1
629 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
631 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
633 DMatDVecMultExpr::selectDefaultAssignKernel( y, A, x );
635 DMatDVecMultExpr::selectBlasAssignKernel( y, A, x );
654 template<
typename VT1
657 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
658 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
679 template<
typename VT1
682 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
683 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
685 typedef IntrinsicTrait<ElementType> IT;
687 const size_t M( A.rows() );
688 const size_t N( A.columns() );
692 for( ; (i+8UL) <= M; i+=8UL ) {
693 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
694 for(
size_t j=0UL; j<N; j+=
IT::size ) {
696 xmm1 = xmm1 + A.load(i ,j) * x1;
697 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
698 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
699 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
700 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
701 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
702 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
703 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
706 y[i+1UL] =
sum( xmm2 );
707 y[i+2UL] =
sum( xmm3 );
708 y[i+3UL] =
sum( xmm4 );
709 y[i+4UL] =
sum( xmm5 );
710 y[i+5UL] =
sum( xmm6 );
711 y[i+6UL] =
sum( xmm7 );
712 y[i+7UL] =
sum( xmm8 );
714 for( ; (i+4UL) <= M; i+=4UL ) {
716 for(
size_t j=0UL; j<N; j+=
IT::size ) {
718 xmm1 = xmm1 + A.load(i ,j) * x1;
719 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
720 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
721 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
724 y[i+1UL] =
sum( xmm2 );
725 y[i+2UL] =
sum( xmm3 );
726 y[i+3UL] =
sum( xmm4 );
728 for( ; (i+3UL) <= M; i+=3UL ) {
730 for(
size_t j=0UL; j<N; j+=
IT::size ) {
732 xmm1 = xmm1 + A.load(i ,j) * x1;
733 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
734 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
737 y[i+1UL] =
sum( xmm2 );
738 y[i+2UL] =
sum( xmm3 );
740 for( ; (i+2UL) <= M; i+=2UL ) {
742 for(
size_t j=0UL; j<N; j+=
IT::size ) {
744 xmm1 = xmm1 + A.load(i ,j) * x1;
745 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
748 y[i+1UL] =
sum( xmm2 );
752 for(
size_t j=0UL; j<N; j+=
IT::size ) {
753 xmm1 = xmm1 + A.load(i,j) * x.load(j);
775 template<
typename VT1
778 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
779 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
781 selectDefaultAssignKernel( y, A, x );
801 template<
typename VT1
804 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
805 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
807 sgemv( y, A, x, 1.0F, 0.0F );
828 template<
typename VT1
831 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
832 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
834 dgemv( y, A, x, 1.0, 0.0 );
855 template<
typename VT1
858 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
859 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
861 cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
882 template<
typename VT1
885 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
886 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
888 zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
907 template<
typename VT1 >
937 template<
typename VT1 >
944 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
956 DMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
972 template<
typename VT1
975 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
977 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
979 DMatDVecMultExpr::selectDefaultAddAssignKernel( y, A, x );
981 DMatDVecMultExpr::selectBlasAddAssignKernel( y, A, x );
1000 template<
typename VT1
1003 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1004 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1006 y.addAssign( A * x );
1025 template<
typename VT1
1028 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1029 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1031 typedef IntrinsicTrait<ElementType> IT;
1033 const size_t M( A.rows() );
1034 const size_t N( A.columns() );
1038 for( ; (i+8UL) <= M; i+=8UL ) {
1039 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1040 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1042 xmm1 = xmm1 + A.load(i ,j) * x1;
1043 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1044 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1045 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1046 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1047 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1048 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1049 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1051 y[i ] +=
sum( xmm1 );
1052 y[i+1UL] +=
sum( xmm2 );
1053 y[i+2UL] +=
sum( xmm3 );
1054 y[i+3UL] +=
sum( xmm4 );
1055 y[i+4UL] +=
sum( xmm5 );
1056 y[i+5UL] +=
sum( xmm6 );
1057 y[i+6UL] +=
sum( xmm7 );
1058 y[i+7UL] +=
sum( xmm8 );
1060 for( ; (i+4UL) <= M; i+=4UL ) {
1062 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1064 xmm1 = xmm1 + A.load(i ,j) * x1;
1065 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1066 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1067 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1069 y[i ] +=
sum( xmm1 );
1070 y[i+1UL] +=
sum( xmm2 );
1071 y[i+2UL] +=
sum( xmm3 );
1072 y[i+3UL] +=
sum( xmm4 );
1074 for( ; (i+3UL) <= M; i+=3UL ) {
1076 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1078 xmm1 = xmm1 + A.load(i ,j) * x1;
1079 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1080 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1082 y[i ] +=
sum( xmm1 );
1083 y[i+1UL] +=
sum( xmm2 );
1084 y[i+2UL] +=
sum( xmm3 );
1086 for( ; (i+2UL) <= M; i+=2UL ) {
1088 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1090 xmm1 = xmm1 + A.load(i ,j) * x1;
1091 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1093 y[i ] +=
sum( xmm1 );
1094 y[i+1UL] +=
sum( xmm2 );
1098 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1099 xmm1 = xmm1 + A.load(i,j) * x.load(j);
1101 y[i] +=
sum( xmm1 );
1121 template<
typename VT1
1124 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1125 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1127 selectDefaultAddAssignKernel( y, A, x );
1147 template<
typename VT1
1150 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1151 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1153 sgemv( y, A, x, 1.0F, 1.0F );
1174 template<
typename VT1
1177 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1178 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1180 dgemv( y, A, x, 1.0, 1.0 );
1201 template<
typename VT1
1204 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1205 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1207 cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1228 template<
typename VT1
1231 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1232 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1234 zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1257 template<
typename VT1 >
1264 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1276 DMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1292 template<
typename VT1
1295 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1297 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1299 DMatDVecMultExpr::selectDefaultSubAssignKernel( y, A, x );
1301 DMatDVecMultExpr::selectBlasSubAssignKernel( y, A, x );
1320 template<
typename VT1
1323 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1324 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1326 y.subAssign( A * x );
1345 template<
typename VT1
1348 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1349 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1351 typedef IntrinsicTrait<ElementType> IT;
1353 const size_t M( A.rows() );
1354 const size_t N( A.columns() );
1358 for( ; (i+8UL) <= M; i+=8UL ) {
1359 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1360 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1362 xmm1 = xmm1 + A.load(i ,j) * x1;
1363 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1364 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1365 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1366 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1367 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1368 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1369 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1371 y[i ] -=
sum( xmm1 );
1372 y[i+1UL] -=
sum( xmm2 );
1373 y[i+2UL] -=
sum( xmm3 );
1374 y[i+3UL] -=
sum( xmm4 );
1375 y[i+4UL] -=
sum( xmm5 );
1376 y[i+5UL] -=
sum( xmm6 );
1377 y[i+6UL] -=
sum( xmm7 );
1378 y[i+7UL] -=
sum( xmm8 );
1380 for( ; (i+4UL) <= M; i+=4UL ) {
1382 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1384 xmm1 = xmm1 + A.load(i ,j) * x1;
1385 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1386 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1387 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1389 y[i ] -=
sum( xmm1 );
1390 y[i+1UL] -=
sum( xmm2 );
1391 y[i+2UL] -=
sum( xmm3 );
1392 y[i+3UL] -=
sum( xmm4 );
1394 for( ; (i+3UL) <= M; i+=3UL ) {
1396 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1398 xmm1 = xmm1 + A.load(i ,j) * x1;
1399 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1400 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1402 y[i ] -=
sum( xmm1 );
1403 y[i+1UL] -=
sum( xmm2 );
1404 y[i+2UL] -=
sum( xmm3 );
1406 for( ; (i+2UL) <= M; i+=2UL ) {
1408 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1410 xmm1 = xmm1 + A.load(i ,j) * x1;
1411 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1413 y[i ] -=
sum( xmm1 );
1414 y[i+1UL] -=
sum( xmm2 );
1418 for(
size_t j=0UL; j<N; j+=
IT::size ) {
1419 xmm1 = xmm1 + A.load(i,j) * x.load(j);
1421 y[i] -=
sum( xmm1 );
1441 template<
typename VT1
1444 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1445 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1447 selectDefaultSubAssignKernel( y, A, x );
1467 template<
typename VT1
1470 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1471 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1473 sgemv( y, A, x, -1.0F, 1.0F );
1494 template<
typename VT1
1497 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1498 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1500 dgemv( y, A, x, -1.0, 1.0 );
1521 template<
typename VT1
1524 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1525 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1527 cgemv( y, A, x, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1548 template<
typename VT1
1551 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1552 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1554 zgemv( y, A, x, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1577 template<
typename VT1 >
1613 template<
typename VT1 >
1614 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1621 if( rhs.mat_.rows() == 0UL ) {
1624 else if( rhs.mat_.columns() == 0UL ) {
1657 template<
typename VT1 >
1658 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1690 template<
typename VT1 >
1691 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1698 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1734 template<
typename VT1 >
1735 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1742 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1778 template<
typename VT1 >
1779 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1829 template<
typename MT
1833 :
public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1834 ,
private VecScalarMultExpr
1835 ,
private Computation
1839 typedef DMatDVecMultExpr<MT,VT> MVM;
1851 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1852 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1857 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<MT>::value };
1865 template<
typename T1 >
1866 struct UseSMPAssign {
1867 enum { value = ( evaluateMatrix || evaluateVector ) };
1876 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1877 struct UseSinglePrecisionKernel {
1879 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1880 IsFloat<typename T1::ElementType>::value &&
1881 IsFloat<typename T2::ElementType>::value &&
1882 IsFloat<typename T3::ElementType>::value &&
1883 !IsComplex<T4>::value };
1892 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1893 struct UseDoublePrecisionKernel {
1895 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1896 IsDouble<typename T1::ElementType>::value &&
1897 IsDouble<typename T2::ElementType>::value &&
1898 IsDouble<typename T3::ElementType>::value &&
1899 !IsComplex<T4>::value };
1908 template<
typename T1,
typename T2,
typename T3 >
1909 struct UseSinglePrecisionComplexKernel {
1910 typedef complex<float> Type;
1912 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1913 IsSame<typename T1::ElementType,Type>::value &&
1914 IsSame<typename T2::ElementType,Type>::value &&
1915 IsSame<typename T3::ElementType,Type>::value };
1924 template<
typename T1,
typename T2,
typename T3 >
1925 struct UseDoublePrecisionComplexKernel {
1926 typedef complex<double> Type;
1928 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1929 IsSame<typename T1::ElementType,Type>::value &&
1930 IsSame<typename T2::ElementType,Type>::value &&
1931 IsSame<typename T3::ElementType,Type>::value };
1939 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1940 struct UseDefaultKernel {
1941 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1942 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1943 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1944 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1953 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1954 struct UseVectorizedDefaultKernel {
1955 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1956 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1957 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1958 IsSame<typename T1::ElementType,T4>::value &&
1959 IntrinsicTrait<typename T1::ElementType>::addition &&
1960 IntrinsicTrait<typename T1::ElementType>::multiplication };
1966 typedef DVecScalarMultExpr<MVM,ST,false>
This;
1967 typedef typename MultTrait<RES,ST>::Type
ResultType;
1970 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1975 typedef const DMatDVecMultExpr<MT,VT>
LeftOperand;
1981 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
LT;
1984 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
RT;
1989 enum { vectorizable = MT::vectorizable && VT::vectorizable &&
1990 IsSame<MET,VET>::value &&
1991 IsSame<MET,ST>::value &&
1992 IntrinsicTrait<MET>::addition &&
1993 IntrinsicTrait<MET>::multiplication };
1996 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
1997 !evaluateVector && VT::smpAssignable };
2006 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
2020 return vector_[index] * scalar_;
2029 inline size_t size()
const {
2030 return vector_.size();
2060 template<
typename T >
2061 inline bool canAlias(
const T* alias )
const {
2062 return vector_.canAlias( alias );
2072 template<
typename T >
2073 inline bool isAliased(
const T* alias )
const {
2074 return vector_.isAliased( alias );
2084 return vector_.isAligned();
2094 typename MVM::LeftOperand A( vector_.leftOperand() );
2096 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2124 template<
typename VT1
2127 static inline void sgemv( VT1& y,
const MT1& A,
const VT2& x,
float alpha,
float beta )
2129 using boost::numeric_cast;
2135 const int M ( numeric_cast<int>( A.rows() ) );
2136 const int N ( numeric_cast<int>( A.columns() ) );
2137 const int lda( numeric_cast<int>( A.spacing() ) );
2139 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, alpha,
2140 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2161 template<
typename VT1
2164 static inline void dgemv( VT1& y,
const MT1& A,
const VT2& x,
double alpha,
double beta )
2166 using boost::numeric_cast;
2172 const int M ( numeric_cast<int>( A.rows() ) );
2173 const int N ( numeric_cast<int>( A.columns() ) );
2174 const int lda( numeric_cast<int>( A.spacing() ) );
2176 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, alpha,
2177 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2198 template<
typename VT1
2201 static inline void cgemv( VT1& y,
const MT1& A,
const VT2& x,
2202 complex<float> alpha, complex<float> beta )
2204 using boost::numeric_cast;
2213 const int M ( numeric_cast<int>( A.rows() ) );
2214 const int N ( numeric_cast<int>( A.columns() ) );
2215 const int lda( numeric_cast<int>( A.spacing() ) );
2217 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2218 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2239 template<
typename VT1
2242 static inline void zgemv( VT1& y,
const MT1& A,
const VT2& x,
2243 complex<double> alpha, complex<double> beta )
2245 using boost::numeric_cast;
2254 const int M ( numeric_cast<int>( A.rows() ) );
2255 const int N ( numeric_cast<int>( A.columns() ) );
2256 const int lda( numeric_cast<int>( A.spacing() ) );
2258 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2259 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2276 template<
typename VT1 >
2277 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2283 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2284 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2286 if( left.rows() == 0UL ) {
2289 else if( left.columns() == 0UL ) {
2302 DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2317 template<
typename VT1
2321 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2323 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2325 DVecScalarMultExpr::selectDefaultAssignKernel( y, A, x, scalar );
2327 DVecScalarMultExpr::selectBlasAssignKernel( y, A, x, scalar );
2345 template<
typename VT1
2349 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2350 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2352 y.assign( A * x * scalar );
2370 template<
typename VT1
2374 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2375 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2377 typedef IntrinsicTrait<ElementType> IT;
2379 const size_t M( A.rows() );
2380 const size_t N( A.columns() );
2384 for( ; (i+8UL) <= M; i+=8UL ) {
2385 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2386 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2388 xmm1 = xmm1 + A.load(i ,j) * x1;
2389 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2390 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2391 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2392 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2393 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2394 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2395 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2397 y[i ] =
sum( xmm1 ) * scalar;
2398 y[i+1UL] =
sum( xmm2 ) * scalar;
2399 y[i+2UL] =
sum( xmm3 ) * scalar;
2400 y[i+3UL] =
sum( xmm4 ) * scalar;
2401 y[i+4UL] =
sum( xmm5 ) * scalar;
2402 y[i+5UL] =
sum( xmm6 ) * scalar;
2403 y[i+6UL] =
sum( xmm7 ) * scalar;
2404 y[i+7UL] =
sum( xmm8 ) * scalar;
2406 for( ; (i+4UL) <= M; i+=4UL ) {
2408 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2410 xmm1 = xmm1 + A.load(i ,j) * x1;
2411 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2412 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2413 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2415 y[i ] =
sum( xmm1 ) * scalar;
2416 y[i+1UL] =
sum( xmm2 ) * scalar;
2417 y[i+2UL] =
sum( xmm3 ) * scalar;
2418 y[i+3UL] =
sum( xmm4 ) * scalar;
2420 for( ; (i+3UL) <= M; i+=3UL ) {
2422 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2424 xmm1 = xmm1 + A.load(i ,j) * x1;
2425 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2426 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2428 y[i ] =
sum( xmm1 ) * scalar;
2429 y[i+1UL] =
sum( xmm2 ) * scalar;
2430 y[i+2UL] =
sum( xmm3 ) * scalar;
2432 for( ; (i+2UL) <= M; i+=2UL ) {
2434 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2436 xmm1 = xmm1 + A.load(i ,j) * x1;
2437 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2439 y[i ] =
sum( xmm1 ) * scalar;
2440 y[i+1UL] =
sum( xmm2 ) * scalar;
2444 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2445 xmm1 = xmm1 + A.load(i,j) * x.load(j);
2447 y[i] =
sum( xmm1 ) * scalar;
2466 template<
typename VT1
2470 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2471 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2473 selectDefaultAssignKernel( y, A, x, scalar );
2492 template<
typename VT1
2496 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2497 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2499 sgemv( y, A, x, scalar, 0.0F );
2519 template<
typename VT1
2523 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2524 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2526 dgemv( y, A, x, scalar, 0.0 );
2546 template<
typename VT1
2550 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2551 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2553 cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
2573 template<
typename VT1
2577 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2578 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2580 zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
2597 template<
typename VT1 >
2598 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2625 template<
typename VT1 >
2626 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2632 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2633 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2635 if( left.rows() == 0UL || left.columns() == 0UL ) {
2647 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2662 template<
typename VT1
2666 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2668 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2670 DVecScalarMultExpr::selectDefaultAddAssignKernel( y, A, x, scalar );
2672 DVecScalarMultExpr::selectBlasAddAssignKernel( y, A, x, scalar );
2690 template<
typename VT1
2694 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2695 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2697 y.addAssign( A * x * scalar );
2715 template<
typename VT1
2719 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2720 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2722 typedef IntrinsicTrait<ElementType> IT;
2724 const size_t M( A.rows() );
2725 const size_t N( A.columns() );
2729 for( ; (i+8UL) <= M; i+=8UL ) {
2730 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2731 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2733 xmm1 = xmm1 + A.load(i ,j) * x1;
2734 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2735 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2736 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2737 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2738 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2739 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2740 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2742 y[i ] +=
sum( xmm1 ) * scalar;
2743 y[i+1UL] +=
sum( xmm2 ) * scalar;
2744 y[i+2UL] +=
sum( xmm3 ) * scalar;
2745 y[i+3UL] +=
sum( xmm4 ) * scalar;
2746 y[i+4UL] +=
sum( xmm5 ) * scalar;
2747 y[i+5UL] +=
sum( xmm6 ) * scalar;
2748 y[i+6UL] +=
sum( xmm7 ) * scalar;
2749 y[i+7UL] +=
sum( xmm8 ) * scalar;
2751 for( ; (i+4UL) <= M; i+=4UL ) {
2753 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2755 xmm1 = xmm1 + A.load(i ,j) * x1;
2756 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2757 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2758 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2760 y[i ] +=
sum( xmm1 ) * scalar;
2761 y[i+1UL] +=
sum( xmm2 ) * scalar;
2762 y[i+2UL] +=
sum( xmm3 ) * scalar;
2763 y[i+3UL] +=
sum( xmm4 ) * scalar;
2765 for( ; (i+3UL) <= M; i+=3UL ) {
2767 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2769 xmm1 = xmm1 + A.load(i ,j) * x1;
2770 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2771 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2773 y[i ] +=
sum( xmm1 ) * scalar;
2774 y[i+1UL] +=
sum( xmm2 ) * scalar;
2775 y[i+2UL] +=
sum( xmm3 ) * scalar;
2777 for( ; (i+2UL) <= M; i+=2UL ) {
2779 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2781 xmm1 = xmm1 + A.load(i ,j) * x1;
2782 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2784 y[i ] +=
sum( xmm1 ) * scalar;
2785 y[i+1UL] +=
sum( xmm2 ) * scalar;
2789 for(
size_t j=0UL; j<N; j+=
IT::size ) {
2790 xmm1 = xmm1 + A.load(i,j) * x.load(j);
2792 y[i] +=
sum( xmm1 ) * scalar;
2811 template<
typename VT1
2815 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2816 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2818 selectDefaultAddAssignKernel( y, A, x, scalar );
2837 template<
typename VT1
2841 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2842 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2844 sgemv( y, A, x, scalar, 1.0F );
2864 template<
typename VT1
2868 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2869 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2871 dgemv( y, A, x, scalar, 1.0 );
2891 template<
typename VT1
2895 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2896 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2898 cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2918 template<
typename VT1
2922 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2923 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2925 zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
2946 template<
typename VT1 >
2947 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2953 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2954 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2956 if( left.rows() == 0UL || left.columns() == 0UL ) {
2968 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2983 template<
typename VT1
2987 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2989 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2991 DVecScalarMultExpr::selectDefaultSubAssignKernel( y, A, x, scalar );
2993 DVecScalarMultExpr::selectBlasSubAssignKernel( y, A, x, scalar );
3011 template<
typename VT1
3015 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3016 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3018 y.subAssign( A * x * scalar );
3036 template<
typename VT1
3040 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3041 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3043 typedef IntrinsicTrait<ElementType> IT;
3045 const size_t M( A.rows() );
3046 const size_t N( A.columns() );
3050 for( ; (i+8UL) <= M; i+=8UL ) {
3051 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3052 for(
size_t j=0UL; j<N; j+=
IT::size ) {
3054 xmm1 = xmm1 + A.load(i ,j) * x1;
3055 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3056 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3057 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
3058 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
3059 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
3060 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
3061 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
3063 y[i ] -=
sum( xmm1 ) * scalar;
3064 y[i+1UL] -=
sum( xmm2 ) * scalar;
3065 y[i+2UL] -=
sum( xmm3 ) * scalar;
3066 y[i+3UL] -=
sum( xmm4 ) * scalar;
3067 y[i+4UL] -=
sum( xmm5 ) * scalar;
3068 y[i+5UL] -=
sum( xmm6 ) * scalar;
3069 y[i+6UL] -=
sum( xmm7 ) * scalar;
3070 y[i+7UL] -=
sum( xmm8 ) * scalar;
3072 for( ; (i+4UL) <= M; i+=4UL ) {
3074 for(
size_t j=0UL; j<N; j+=
IT::size ) {
3076 xmm1 = xmm1 + A.load(i ,j) * x1;
3077 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3078 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3079 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
3081 y[i ] -=
sum( xmm1 ) * scalar;
3082 y[i+1UL] -=
sum( xmm2 ) * scalar;
3083 y[i+2UL] -=
sum( xmm3 ) * scalar;
3084 y[i+3UL] -=
sum( xmm4 ) * scalar;
3086 for( ; (i+3UL) <= M; i+=3UL ) {
3088 for(
size_t j=0UL; j<N; j+=
IT::size ) {
3090 xmm1 = xmm1 + A.load(i ,j) * x1;
3091 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3092 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
3094 y[i ] -=
sum( xmm1 ) * scalar;
3095 y[i+1UL] -=
sum( xmm2 ) * scalar;
3096 y[i+2UL] -=
sum( xmm3 ) * scalar;
3098 for( ; (i+2UL) <= M; i+=2UL ) {
3100 for(
size_t j=0UL; j<N; j+=
IT::size ) {
3102 xmm1 = xmm1 + A.load(i ,j) * x1;
3103 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
3105 y[i ] -=
sum( xmm1 ) * scalar;
3106 y[i+1UL] -=
sum( xmm2 ) * scalar;
3110 for(
size_t j=0UL; j<N; j+=
IT::size ) {
3111 xmm1 = xmm1 + A.load(i,j) * x.load(j);
3113 y[i] -=
sum( xmm1 ) * scalar;
3132 template<
typename VT1
3136 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3137 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3139 selectDefaultSubAssignKernel( y, A, x, scalar );
3158 template<
typename VT1
3162 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3163 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3165 sgemv( y, A, x, -scalar, 1.0F );
3185 template<
typename VT1
3189 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3190 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3192 dgemv( y, A, x, -scalar, 1.0 );
3212 template<
typename VT1
3216 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3217 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3219 cgemv( y, A, x, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3239 template<
typename VT1
3243 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3244 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3246 zgemv( y, A, x, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3267 template<
typename VT1 >
3268 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3301 template<
typename VT1 >
3302 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3303 smpAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3309 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3310 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3312 if( left.rows() == 0UL ) {
3315 else if( left.columns() == 0UL ) {
3346 template<
typename VT1 >
3347 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3348 smpAssign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3377 template<
typename VT1 >
3378 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3379 smpAddAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3385 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3386 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3388 if( left.rows() == 0UL || left.columns() == 0UL ) {
3422 template<
typename VT1 >
3423 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3424 smpSubAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3430 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3431 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3433 if( left.rows() == 0UL || left.columns() == 0UL ) {
3467 template<
typename VT1 >
3468 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3469 smpMultAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3541 template<
typename T1
3543 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
3549 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
3577 template<
typename T1
3580 inline const typename EnableIf< IsMatMatMultExpr<T1>,
typename MultExprTrait<T1,T2>::Type >::Type
3587 return (~mat).leftOperand() * ( (~mat).
rightOperand() * vec );
3602 template<
typename MT,
typename VT >
3620 template<
typename MT,
typename VT,
bool AF >
3625 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type, VT >::Type Type;
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:262
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:351
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
DMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the DMatDVecMultExpr class.
Definition: DMatDVecMultExpr.h:289
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
Expression object for dense matrix-dense vector multiplications.The DMatDVecMultExpr class represents...
Definition: DMatDVecMultExpr.h:108
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:114
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDVecMultExpr.h:385
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDVecMultExpr.h:251
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
Header file for the DenseVector base class.
const size_t SMP_DMATDVECMULT_THRESHOLD
SMP row-major dense matrix/dense vector multiplication threshold.This threshold specifies when a row-...
Definition: Thresholds.h:322
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
DMatDVecMultExpr< MT, VT > This
Type of this DMatDVecMultExpr instance.
Definition: DMatDVecMultExpr.h:250
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
size_t size() const
Returns the current size/dimension of the vector.
Definition: DMatDVecMultExpr.h:331
const size_t end_
End of the unrolled calculation loop.
Definition: DMatDVecMultExpr.h:407
Constraint on the data type.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:119
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:116
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:259
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:268
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:118
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:341
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDVecMultExpr.h:395
Constraint on the data type.
Base class for all matrix/vector multiplication expression templates.The MatVecMultExpr class serves ...
Definition: MatVecMultExpr.h:66
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:265
Constraints on the storage order of matrix types.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDVecMultExpr.h:253
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDVecMultExpr.h:405
Header file for the EnableIf class template.
Header file for the serial shim.
Header file for the IsNumeric type trait.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: DMatDVecMultExpr.h:406
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDVecMultExpr.h:363
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:115
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDVecMultExpr.h:256
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:259
Constraint on the data type.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDVecMultExpr.h:375
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDVecMultExpr.h:252
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDVecMultExpr.h:254
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDVecMultExpr.h:255
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: DMatDVecMultExpr.h:304
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
const size_t DMATDVECMULT_THRESHOLD
Row-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:57
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:117
Constraint on the data type.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849