35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
44 #include <boost/cast.hpp>
105 template<
typename MT
107 class TDMatDVecMultExpr :
public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
108 ,
private MatVecMultExpr
109 ,
private Computation
138 template<
typename T1 >
139 struct UseSMPAssign {
140 enum { value = ( evaluateMatrix || evaluateVector ) };
151 template<
typename T1,
typename T2,
typename T3 >
152 struct UseSinglePrecisionKernel {
154 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
168 template<
typename T1,
typename T2,
typename T3 >
169 struct UseDoublePrecisionKernel {
171 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
185 template<
typename T1,
typename T2,
typename T3 >
186 struct UseSinglePrecisionComplexKernel {
187 typedef complex<float> Type;
189 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
203 template<
typename T1,
typename T2,
typename T3 >
204 struct UseDoublePrecisionComplexKernel {
205 typedef complex<double> Type;
207 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
220 template<
typename T1,
typename T2,
typename T3 >
221 struct UseDefaultKernel {
222 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
223 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
224 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
225 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
236 template<
typename T1,
typename T2,
typename T3 >
237 struct UseVectorizedDefaultKernel {
238 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
272 enum { vectorizable = MT::vectorizable && VT::vectorizable &&
278 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
279 !evaluateVector && VT::smpAssignable };
308 if(
mat_.columns() != 0UL ) {
310 for(
size_t j=1UL; j<
end_; j+=2UL ) {
313 if( end_ <
mat_.columns() ) {
361 template<
typename T >
363 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
373 template<
typename T >
375 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
385 return mat_.isAligned() &&
vec_.isAligned();
426 template<
typename VT1
429 static inline void sgemv( VT1& y,
const MT1& A,
const VT2& x,
float alpha,
float beta )
431 using boost::numeric_cast;
437 const int M ( numeric_cast<int>( A.rows() ) );
438 const int N ( numeric_cast<int>( A.columns() ) );
439 const int lda( numeric_cast<int>( A.spacing() ) );
441 cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
442 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
465 template<
typename VT1
468 static inline void dgemv( VT1& y,
const MT1& A,
const VT2& x,
double alpha,
double beta )
470 using boost::numeric_cast;
476 const int M ( numeric_cast<int>( A.rows() ) );
477 const int N ( numeric_cast<int>( A.columns() ) );
478 const int lda( numeric_cast<int>( A.spacing() ) );
480 cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
481 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
504 template<
typename VT1
507 static inline void cgemv( VT1& y,
const MT1& A,
const VT2& x,
508 complex<float> alpha, complex<float> beta )
510 using boost::numeric_cast;
519 const int M ( numeric_cast<int>( A.rows() ) );
520 const int N ( numeric_cast<int>( A.columns() ) );
521 const int lda( numeric_cast<int>( A.spacing() ) );
523 cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
524 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
547 template<
typename VT1
550 static inline void zgemv( VT1& y,
const MT1& A,
const VT2& x,
551 complex<double> alpha, complex<double> beta )
553 using boost::numeric_cast;
562 const int M ( numeric_cast<int>( A.rows() ) );
563 const int N ( numeric_cast<int>( A.columns() ) );
564 const int lda( numeric_cast<int>( A.spacing() ) );
566 cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
567 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
586 template<
typename VT1 >
593 if( rhs.mat_.rows() == 0UL ) {
596 else if( rhs.mat_.columns() == 0UL ) {
609 TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
625 template<
typename VT1
628 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
630 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
632 TDMatDVecMultExpr::selectDefaultAssignKernel( y, A, x );
634 TDMatDVecMultExpr::selectBlasAssignKernel( y, A, x );
653 template<
typename VT1
656 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
657 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
659 const size_t M( A.rows() );
660 const size_t N( A.columns() );
663 const size_t iend( M &
size_t(-2) );
665 for(
size_t i=0UL; i<M; ++i ) {
666 y[i] = x[0UL] * A(i,0UL);
668 for(
size_t j=1UL; j<N; ++j ) {
669 for(
size_t i=0UL; i<iend; i+=2UL ) {
670 y[i ] += x[j] * A(i ,j);
671 y[i+1UL] += x[j] * A(i+1UL,j);
674 y[iend] += x[j] * A(iend,j);
695 template<
typename VT1
698 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
699 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
701 typedef IntrinsicTrait<ElementType> IT;
703 const size_t M( A.rows() );
704 const size_t N( A.columns() );
709 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
710 for(
size_t j=0UL; j<N; ++j ) {
712 xmm1 = xmm1 + A.load(i ,j) * x1;
713 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
714 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
715 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
716 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
717 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
718 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
719 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
732 for(
size_t j=0UL; j<N; ++j ) {
734 xmm1 = xmm1 + A.load(i ,j) * x1;
735 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
736 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
737 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
746 for(
size_t j=0UL; j<N; ++j ) {
748 xmm1 = xmm1 + A.load(i ,j) * x1;
749 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
750 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
758 for(
size_t j=0UL; j<N; ++j ) {
760 xmm1 = xmm1 + A.load(i ,j) * x1;
761 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
768 for(
size_t j=0UL; j<N; ++j ) {
769 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
791 template<
typename VT1
794 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
795 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
797 selectDefaultAssignKernel( y, A, x );
817 template<
typename VT1
820 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
821 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
823 sgemv( y, A, x, 1.0F, 0.0F );
844 template<
typename VT1
847 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
848 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
850 dgemv( y, A, x, 1.0, 0.0 );
871 template<
typename VT1
874 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
875 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
877 cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
898 template<
typename VT1
901 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
902 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
904 zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
923 template<
typename VT1 >
953 template<
typename VT1 >
960 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
972 TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
988 template<
typename VT1
991 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
993 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
995 TDMatDVecMultExpr::selectDefaultAddAssignKernel( y, A, x );
997 TDMatDVecMultExpr::selectBlasAddAssignKernel( y, A, x );
1016 template<
typename VT1
1019 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1020 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1022 const size_t M( A.rows() );
1023 const size_t N( A.columns() );
1026 const size_t iend( M &
size_t(-2) );
1028 for(
size_t j=0UL; j<N; ++j ) {
1029 for(
size_t i=0UL; i<iend; i+=2UL ) {
1030 y[i ] += x[j] * A(i ,j);
1031 y[i+1UL] += x[j] * A(i+1UL,j);
1034 y[iend] += x[j] * A(iend,j);
1055 template<
typename VT1
1058 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1059 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1061 typedef IntrinsicTrait<ElementType> IT;
1063 const size_t M( A.rows() );
1064 const size_t N( A.columns() );
1077 for(
size_t j=0UL; j<N; ++j ) {
1079 xmm1 = xmm1 + A.load(i ,j) * x1;
1080 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1081 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1082 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1083 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
1084 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
1085 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
1086 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
1088 y.store( i , xmm1 );
1102 for(
size_t j=0UL; j<N; ++j ) {
1104 xmm1 = xmm1 + A.load(i ,j) * x1;
1105 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1106 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1107 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1109 y.store( i , xmm1 );
1118 for(
size_t j=0UL; j<N; ++j ) {
1120 xmm1 = xmm1 + A.load(i ,j) * x1;
1121 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1122 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1124 y.store( i , xmm1 );
1131 for(
size_t j=0UL; j<N; ++j ) {
1133 xmm1 = xmm1 + A.load(i ,j) * x1;
1134 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
1136 y.store( i , xmm1 );
1141 for(
size_t j=0UL; j<N; ++j ) {
1142 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
1164 template<
typename VT1
1167 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1168 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1170 selectDefaultAddAssignKernel( y, A, x );
1190 template<
typename VT1
1193 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1194 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1196 sgemv( y, A, x, 1.0F, 1.0F );
1217 template<
typename VT1
1220 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1221 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1223 dgemv( y, A, x, 1.0, 1.0 );
1244 template<
typename VT1
1247 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1248 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1250 cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1271 template<
typename VT1
1274 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1275 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1277 zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1300 template<
typename VT1 >
1307 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1319 TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1335 template<
typename VT1
1338 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1340 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1342 TDMatDVecMultExpr::selectDefaultSubAssignKernel( y, A, x );
1344 TDMatDVecMultExpr::selectBlasSubAssignKernel( y, A, x );
1363 template<
typename VT1
1366 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1367 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1369 const size_t M( A.rows() );
1370 const size_t N( A.columns() );
1373 const size_t iend( M &
size_t(-2) );
1375 for(
size_t j=0UL; j<N; ++j ) {
1376 for(
size_t i=0UL; i<iend; i+=2UL ) {
1377 y[i ] -= x[j] * A(i ,j);
1378 y[i+1UL] -= x[j] * A(i+1UL,j);
1381 y[iend] -= x[j] * A(iend,j);
1402 template<
typename VT1
1405 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1406 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1408 typedef IntrinsicTrait<ElementType> IT;
1410 const size_t M( A.rows() );
1411 const size_t N( A.columns() );
1424 for(
size_t j=0UL; j<N; ++j ) {
1426 xmm1 = xmm1 - A.load(i ,j) * x1;
1427 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1428 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1429 xmm4 = xmm4 - A.load(i+
IT::size*3UL,j) * x1;
1430 xmm5 = xmm5 - A.load(i+
IT::size*4UL,j) * x1;
1431 xmm6 = xmm6 - A.load(i+
IT::size*5UL,j) * x1;
1432 xmm7 = xmm7 - A.load(i+
IT::size*6UL,j) * x1;
1433 xmm8 = xmm8 - A.load(i+
IT::size*7UL,j) * x1;
1435 y.store( i , xmm1 );
1449 for(
size_t j=0UL; j<N; ++j ) {
1451 xmm1 = xmm1 - A.load(i ,j) * x1;
1452 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1453 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1454 xmm4 = xmm4 - A.load(i+
IT::size*3UL,j) * x1;
1456 y.store( i , xmm1 );
1465 for(
size_t j=0UL; j<N; ++j ) {
1467 xmm1 = xmm1 - A.load(i ,j) * x1;
1468 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1469 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1471 y.store( i , xmm1 );
1478 for(
size_t j=0UL; j<N; ++j ) {
1480 xmm1 = xmm1 - A.load(i ,j) * x1;
1481 xmm2 = xmm2 - A.load(i+
IT::size,j) * x1;
1483 y.store( i , xmm1 );
1488 for(
size_t j=0UL; j<N; ++j ) {
1489 xmm1 = xmm1 - A.load(i,j) *
set( x[j] );
1511 template<
typename VT1
1514 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1515 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1517 selectDefaultSubAssignKernel( y, A, x );
1537 template<
typename VT1
1540 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1541 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1543 sgemv( y, A, x, -1.0F, 1.0F );
1564 template<
typename VT1
1567 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1568 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1570 dgemv( y, A, x, -1.0, 1.0 );
1591 template<
typename VT1
1594 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1595 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1597 cgemv( y, A, x, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1618 template<
typename VT1
1621 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1622 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1624 zgemv( y, A, x, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1647 template<
typename VT1 >
1683 template<
typename VT1 >
1684 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1691 if( rhs.mat_.rows() == 0UL ) {
1694 else if( rhs.mat_.columns() == 0UL ) {
1727 template<
typename VT1 >
1728 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1760 template<
typename VT1 >
1761 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1768 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1804 template<
typename VT1 >
1805 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1812 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1848 template<
typename VT1 >
1849 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1900 template<
typename MT
1904 :
public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
1905 ,
private VecScalarMultExpr
1906 ,
private Computation
1910 typedef TDMatDVecMultExpr<MT,VT> MVM;
1922 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1923 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1928 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1936 template<
typename T1 >
1937 struct UseSMPAssign {
1938 enum { value = ( evaluateMatrix || evaluateVector ) };
1947 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1948 struct UseSinglePrecisionKernel {
1950 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1951 IsFloat<typename T1::ElementType>::value &&
1952 IsFloat<typename T2::ElementType>::value &&
1953 IsFloat<typename T3::ElementType>::value &&
1954 !IsComplex<T4>::value };
1963 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1964 struct UseDoublePrecisionKernel {
1966 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1967 IsDouble<typename T1::ElementType>::value &&
1968 IsDouble<typename T2::ElementType>::value &&
1969 IsDouble<typename T3::ElementType>::value &&
1970 !IsComplex<T4>::value };
1979 template<
typename T1,
typename T2,
typename T3 >
1980 struct UseSinglePrecisionComplexKernel {
1981 typedef complex<float> Type;
1983 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1984 IsSame<typename T1::ElementType,Type>::value &&
1985 IsSame<typename T2::ElementType,Type>::value &&
1986 IsSame<typename T3::ElementType,Type>::value };
1995 template<
typename T1,
typename T2,
typename T3 >
1996 struct UseDoublePrecisionComplexKernel {
1997 typedef complex<double> Type;
1999 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2000 IsSame<typename T1::ElementType,Type>::value &&
2001 IsSame<typename T2::ElementType,Type>::value &&
2002 IsSame<typename T3::ElementType,Type>::value };
2010 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2011 struct UseDefaultKernel {
2012 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2013 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2014 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2015 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2024 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2025 struct UseVectorizedDefaultKernel {
2026 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2027 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2028 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2029 IsSame<typename T1::ElementType,T4>::value &&
2030 IntrinsicTrait<typename T1::ElementType>::addition &&
2031 IntrinsicTrait<typename T1::ElementType>::multiplication };
2037 typedef DVecScalarMultExpr<MVM,ST,false>
This;
2038 typedef typename MultTrait<RES,ST>::Type
ResultType;
2041 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2046 typedef const TDMatDVecMultExpr<MT,VT>
LeftOperand;
2052 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
LT;
2055 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
RT;
2060 enum { vectorizable = MT::vectorizable && VT::vectorizable &&
2061 IsSame<MET,VET>::value &&
2062 IsSame<MET,ST>::value &&
2063 IntrinsicTrait<MET>::addition &&
2064 IntrinsicTrait<MET>::multiplication };
2067 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2068 !evaluateVector && VT::smpAssignable };
2077 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
2091 return vector_[index] * scalar_;
2100 inline size_t size()
const {
2101 return vector_.size();
2131 template<
typename T >
2132 inline bool canAlias(
const T* alias )
const {
2133 return vector_.canAlias( alias );
2143 template<
typename T >
2144 inline bool isAliased(
const T* alias )
const {
2145 return vector_.isAliased( alias );
2155 return vector_.isAligned();
2165 typename MVM::LeftOperand A( vector_.leftOperand() );
2167 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2195 template<
typename VT1
2198 static inline void sgemv( VT1& y,
const MT1& A,
const VT2& x,
float alpha,
float beta )
2200 using boost::numeric_cast;
2206 const int M ( numeric_cast<int>( A.rows() ) );
2207 const int N ( numeric_cast<int>( A.columns() ) );
2208 const int lda( numeric_cast<int>( A.spacing() ) );
2210 cblas_sgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
2211 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2232 template<
typename VT1
2235 static inline void dgemv( VT1& y,
const MT1& A,
const VT2& x,
double alpha,
double beta )
2237 using boost::numeric_cast;
2243 const int M ( numeric_cast<int>( A.rows() ) );
2244 const int N ( numeric_cast<int>( A.columns() ) );
2245 const int lda( numeric_cast<int>( A.spacing() ) );
2247 cblas_dgemv( CblasColMajor, CblasNoTrans, M, N, alpha,
2248 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2269 template<
typename VT1
2272 static inline void cgemv( VT1& y,
const MT1& A,
const VT2& x,
2273 complex<float> alpha, complex<float> beta )
2275 using boost::numeric_cast;
2284 const int M ( numeric_cast<int>( A.rows() ) );
2285 const int N ( numeric_cast<int>( A.columns() ) );
2286 const int lda( numeric_cast<int>( A.spacing() ) );
2288 cblas_cgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2289 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2310 template<
typename VT1
2313 static inline void zgemv( VT1& y,
const MT1& A,
const VT2& x,
2314 complex<double> alpha, complex<double> beta )
2316 using boost::numeric_cast;
2325 const int M ( numeric_cast<int>( A.rows() ) );
2326 const int N ( numeric_cast<int>( A.columns() ) );
2327 const int lda( numeric_cast<int>( A.spacing() ) );
2329 cblas_zgemv( CblasColMajor, CblasNoTrans, M, N, &alpha,
2330 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2347 template<
typename VT1 >
2348 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2354 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2355 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2357 if( left.rows() == 0UL ) {
2360 else if( left.columns() == 0UL ) {
2373 DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2388 template<
typename VT1
2392 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2394 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2396 DVecScalarMultExpr::selectDefaultAssignKernel( y, A, x, scalar );
2398 DVecScalarMultExpr::selectBlasAssignKernel( y, A, x, scalar );
2416 template<
typename VT1
2420 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2421 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2423 const size_t M( A.rows() );
2424 const size_t N( A.columns() );
2427 const size_t iend( M &
size_t(-2) );
2429 for(
size_t i=0UL; i<M; ++i ) {
2430 y[i] = x[0UL] * A(i,0UL);
2432 for(
size_t j=1UL; j<N; ++j ) {
2433 for(
size_t i=0UL; i<iend; i+=2UL ) {
2434 y[i ] += x[j] * A(i ,j);
2435 y[i+1UL] += x[j] * A(i+1UL,j);
2438 y[iend] += x[j] * A(iend,j);
2441 for(
size_t i=0UL; i<M; ++i ) {
2461 template<
typename VT1
2465 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2466 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2468 typedef IntrinsicTrait<ElementType> IT;
2470 const size_t M( A.rows() );
2471 const size_t N( A.columns() );
2478 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2479 for(
size_t j=0UL; j<N; ++j ) {
2481 xmm1 = xmm1 + A.load(i ,j) * x1;
2482 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2483 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2484 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2485 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
2486 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
2487 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
2488 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
2490 y.store( i , xmm1*factor );
2491 y.store( i+
IT::size , xmm2*factor );
2492 y.store( i+
IT::size*2UL, xmm3*factor );
2493 y.store( i+
IT::size*3UL, xmm4*factor );
2494 y.store( i+
IT::size*4UL, xmm5*factor );
2495 y.store( i+
IT::size*5UL, xmm6*factor );
2496 y.store( i+
IT::size*6UL, xmm7*factor );
2497 y.store( i+
IT::size*7UL, xmm8*factor );
2501 for(
size_t j=0UL; j<N; ++j ) {
2503 xmm1 = xmm1 + A.load(i ,j) * x1;
2504 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2505 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2506 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2508 y.store( i , xmm1*factor );
2509 y.store( i+
IT::size , xmm2*factor );
2510 y.store( i+
IT::size*2UL, xmm3*factor );
2511 y.store( i+
IT::size*3UL, xmm4*factor );
2515 for(
size_t j=0UL; j<N; ++j ) {
2517 xmm1 = xmm1 + A.load(i ,j) * x1;
2518 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2519 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2521 y.store( i , xmm1*factor );
2522 y.store( i+
IT::size , xmm2*factor );
2523 y.store( i+
IT::size*2UL, xmm3*factor );
2527 for(
size_t j=0UL; j<N; ++j ) {
2529 xmm1 = xmm1 + A.load(i ,j) * x1;
2530 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
2532 y.store( i , xmm1*factor );
2533 y.store( i+
IT::size, xmm2*factor );
2537 for(
size_t j=0UL; j<N; ++j ) {
2539 xmm1 = xmm1 + A.load(i,j) * x1;
2541 y.store( i, xmm1*factor );
2560 template<
typename VT1
2564 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2565 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2567 selectDefaultAssignKernel( y, A, x, scalar );
2586 template<
typename VT1
2590 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2591 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2593 sgemv( y, A, x, scalar, 0.0F );
2613 template<
typename VT1
2617 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2618 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2620 dgemv( y, A, x, scalar, 0.0 );
2640 template<
typename VT1
2644 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2645 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2647 cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
2667 template<
typename VT1
2671 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2672 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2674 zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
2691 template<
typename VT1 >
2692 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2719 template<
typename VT1 >
2720 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2726 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2727 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2729 if( left.rows() == 0UL || left.columns() == 0UL ) {
2741 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2756 template<
typename VT1
2760 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2762 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2764 DVecScalarMultExpr::selectDefaultAddAssignKernel( y, A, x, scalar );
2766 DVecScalarMultExpr::selectBlasAddAssignKernel( y, A, x, scalar );
2784 template<
typename VT1
2788 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2789 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2791 y.addAssign( A * x * scalar );
2809 template<
typename VT1
2813 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2814 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2816 typedef IntrinsicTrait<ElementType> IT;
2818 const size_t M( A.rows() );
2819 const size_t N( A.columns() );
2826 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2827 for(
size_t j=0UL; j<N; ++j ) {
2829 xmm1 = xmm1 + A.load(i ,j) * x1;
2830 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2831 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2832 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2833 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
2834 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
2835 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
2836 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
2838 y.store( i , y.load(i ) + xmm1*factor );
2849 for(
size_t j=0UL; j<N; ++j ) {
2851 xmm1 = xmm1 + A.load(i ,j) * x1;
2852 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2853 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2854 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2856 y.store( i , y.load(i ) + xmm1*factor );
2863 for(
size_t j=0UL; j<N; ++j ) {
2865 xmm1 = xmm1 + A.load(i ,j) * x1;
2866 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2867 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2869 y.store( i , y.load(i ) + xmm1*factor );
2875 for(
size_t j=0UL; j<N; ++j ) {
2877 xmm1 = xmm1 + A.load(i ,j) * x1;
2878 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
2880 y.store( i , y.load(i ) + xmm1*factor );
2885 for(
size_t j=0UL; j<N; ++j ) {
2886 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
2888 y.store( i, y.load(i) + xmm1*factor );
2907 template<
typename VT1
2911 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2912 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2914 selectDefaultAddAssignKernel( y, A, x, scalar );
2933 template<
typename VT1
2937 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2938 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2940 sgemv( y, A, x, scalar, 1.0F );
2960 template<
typename VT1
2964 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2965 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2967 dgemv( y, A, x, scalar, 1.0 );
2987 template<
typename VT1
2991 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2992 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2994 cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3014 template<
typename VT1
3018 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3019 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3021 zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3042 template<
typename VT1 >
3043 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3049 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3050 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3052 if( left.rows() == 0UL || left.columns() == 0UL ) {
3064 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3079 template<
typename VT1
3083 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3085 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
3087 DVecScalarMultExpr::selectDefaultSubAssignKernel( y, A, x, scalar );
3089 DVecScalarMultExpr::selectBlasSubAssignKernel( y, A, x, scalar );
3107 template<
typename VT1
3111 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3112 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3114 y.subAssign( A * x * scalar );
3132 template<
typename VT1
3136 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3137 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3139 typedef IntrinsicTrait<ElementType> IT;
3141 const size_t M( A.rows() );
3142 const size_t N( A.columns() );
3149 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3150 for(
size_t j=0UL; j<N; ++j ) {
3152 xmm1 = xmm1 + A.load(i ,j) * x1;
3153 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3154 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3155 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3156 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3157 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3158 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3159 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3161 y.store( i , y.load(i ) - xmm1*factor );
3172 for(
size_t j=0UL; j<N; ++j ) {
3174 xmm1 = xmm1 + A.load(i ,j) * x1;
3175 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3176 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3177 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3179 y.store( i , y.load(i ) - xmm1*factor );
3186 for(
size_t j=0UL; j<N; ++j ) {
3188 xmm1 = xmm1 + A.load(i ,j) * x1;
3189 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3190 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3192 y.store( i , y.load(i ) - xmm1*factor );
3198 for(
size_t j=0UL; j<N; ++j ) {
3200 xmm1 = xmm1 + A.load(i ,j) * x1;
3201 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3203 y.store( i , y.load(i ) - xmm1*factor );
3208 for(
size_t j=0UL; j<N; ++j ) {
3209 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
3211 y.store( i, y.load(i) - xmm1*factor );
3230 template<
typename VT1
3234 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3235 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3237 selectDefaultSubAssignKernel( y, A, x, scalar );
3256 template<
typename VT1
3260 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3261 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3263 sgemv( y, A, x, -scalar, 1.0F );
3283 template<
typename VT1
3287 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3288 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3290 dgemv( y, A, x, -scalar, 1.0 );
3310 template<
typename VT1
3314 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3315 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3317 cgemv( y, A, x, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3337 template<
typename VT1
3341 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3342 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3344 zgemv( y, A, x, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3365 template<
typename VT1 >
3366 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3399 template<
typename VT1 >
3400 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3401 smpAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3407 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3408 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3410 if( left.rows() == 0UL ) {
3413 else if( left.columns() == 0UL ) {
3444 template<
typename VT1 >
3445 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3446 smpAssign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3475 template<
typename VT1 >
3476 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3477 smpAddAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3483 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3484 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3486 if( left.rows() == 0UL || left.columns() == 0UL ) {
3520 template<
typename VT1 >
3521 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3522 smpSubAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3528 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3529 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3531 if( left.rows() == 0UL || left.columns() == 0UL ) {
3566 template<
typename VT1 >
3567 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3568 smpMultAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3641 template<
typename T1
3643 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
3649 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
3666 template<
typename MT,
typename VT >
3684 template<
typename MT,
typename VT,
bool AF >
3689 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type, VT >::Type Type;
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:264
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:114
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:258
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:288
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
Header file for the DenseVector base class.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:251
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:404
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:126
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:259
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:261
Header file for the multiplication trait.
Header file for the IsDouble type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:405
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:113
Header file for the IsBlasCompatible type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:340
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDMatDVecMultExpr.h:406
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:254
Constraints on the storage order of matrix types.
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:255
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:267
Header file for the EnableIf class template.
Header file for the serial shim.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:330
Header file for the IsNumeric type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:374
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:115
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
System settings for the BLAS mode.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:250
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:253
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:74
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:362
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
const size_t SMP_TDMATDVECMULT_THRESHOLD
SMP column-major dense matrix/dense vector multiplication threshold.This threshold specifies when a c...
Definition: Thresholds.h:345
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:116
Header file for all intrinsic functionality.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:252
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:303
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:249
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
Header file for basic type definitions.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:118
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:394
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:384
Constraint on the data type.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:350
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:117
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849