35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
106 template<
typename VT
108 class TDVecDMatMultExpr :
public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
109 ,
private TVecMatMultExpr
110 ,
private Computation
139 template<
typename T1 >
140 struct UseSMPAssign {
141 enum { value = ( evaluateVector || evaluateMatrix ) };
152 template<
typename T1,
typename T2,
typename T3 >
153 struct UseSinglePrecisionKernel {
155 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
156 IsFloat<typename T1::ElementType>::value &&
157 IsFloat<typename T2::ElementType>::value &&
158 IsFloat<typename T3::ElementType>::value };
169 template<
typename T1,
typename T2,
typename T3 >
170 struct UseDoublePrecisionKernel {
172 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
173 IsDouble<typename T1::ElementType>::value &&
174 IsDouble<typename T2::ElementType>::value &&
175 IsDouble<typename T3::ElementType>::value };
186 template<
typename T1,
typename T2,
typename T3 >
187 struct UseSinglePrecisionComplexKernel {
188 typedef complex<float> Type;
190 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191 IsSame<typename T1::ElementType,Type>::value &&
192 IsSame<typename T2::ElementType,Type>::value &&
193 IsSame<typename T3::ElementType,Type>::value };
204 template<
typename T1,
typename T2,
typename T3 >
205 struct UseDoublePrecisionComplexKernel {
206 typedef complex<double> Type;
208 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
209 IsSame<typename T1::ElementType,Type>::value &&
210 IsSame<typename T2::ElementType,Type>::value &&
211 IsSame<typename T3::ElementType,Type>::value };
221 template<
typename T1,
typename T2,
typename T3 >
222 struct UseDefaultKernel {
223 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
224 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
225 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
226 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
237 template<
typename T1,
typename T2,
typename T3 >
238 struct UseVectorizedDefaultKernel {
239 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
240 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
241 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
242 IntrinsicTrait<typename T1::ElementType>::addition &&
243 IntrinsicTrait<typename T1::ElementType>::multiplication };
273 enum { vectorizable = VT::vectorizable && MT::vectorizable &&
279 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
280 !evaluateMatrix && MT::smpAssignable };
309 if(
mat_.rows() != 0UL ) {
311 for(
size_t j=1UL; j<
end_; j+=2UL ) {
314 if( end_ < mat_.rows() ) {
332 return mat_.columns();
362 template<
typename T >
364 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
374 template<
typename T >
376 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
386 return vec_.isAligned() &&
mat_.isAligned();
427 template<
typename VT1
430 static inline void sgemv( VT1& y,
const VT2& x,
const MT1& A,
float alpha,
float beta )
432 using boost::numeric_cast;
438 const int M ( numeric_cast<int>( A.rows() ) );
439 const int N ( numeric_cast<int>( A.columns() ) );
440 const int lda( numeric_cast<int>( A.spacing() ) );
442 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, alpha,
443 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
466 template<
typename VT1
469 static inline void dgemv( VT1& y,
const VT2& x,
const MT1& A,
double alpha,
double beta )
471 using boost::numeric_cast;
477 const int M ( numeric_cast<int>( A.rows() ) );
478 const int N ( numeric_cast<int>( A.columns() ) );
479 const int lda( numeric_cast<int>( A.spacing() ) );
481 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, alpha,
482 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
505 template<
typename VT1
508 static inline void cgemv( VT1& y,
const VT2& x,
const MT1& A,
509 complex<float> alpha, complex<float> beta )
511 using boost::numeric_cast;
520 const int M ( numeric_cast<int>( A.rows() ) );
521 const int N ( numeric_cast<int>( A.columns() ) );
522 const int lda( numeric_cast<int>( A.spacing() ) );
524 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
525 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
548 template<
typename VT1
551 static inline void zgemv( VT1& y,
const VT2& x,
const MT1& A,
552 complex<double> alpha, complex<float> beta )
554 using boost::numeric_cast;
563 const int M ( numeric_cast<int>( A.rows() ) );
564 const int N ( numeric_cast<int>( A.columns() ) );
565 const int lda( numeric_cast<int>( A.spacing() ) );
567 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
568 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
587 template<
typename VT1 >
594 if( rhs.mat_.rows() == 0UL ) {
598 else if( rhs.mat_.columns() == 0UL ) {
610 TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
626 template<
typename VT1
629 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
631 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
633 TDVecDMatMultExpr::selectDefaultAssignKernel( y, x, A );
635 TDVecDMatMultExpr::selectBlasAssignKernel( y, x, A );
654 template<
typename VT1
657 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
658 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
660 const size_t M( A.rows() );
661 const size_t N( A.columns() );
664 const size_t jend( N &
size_t(-2) );
666 for(
size_t j=0UL; j<N; ++j ) {
667 y[j] = x[0UL] * A(0UL,j);
669 for(
size_t i=1UL; i<M; ++i ) {
670 for(
size_t j=0UL; j<jend; j+=2UL ) {
671 y[j ] += x[i] * A(i,j );
672 y[j+1UL] += x[i] * A(i,j+1UL);
675 y[jend] += x[i] * A(i,jend);
696 template<
typename VT1
699 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
700 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
702 typedef IntrinsicTrait<ElementType> IT;
704 const size_t M( A.rows() );
705 const size_t N( A.columns() );
710 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
711 for(
size_t i=0UL; i<M; ++i ) {
713 xmm1 = xmm1 + x1 * A.load(i,j );
714 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
715 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
716 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
717 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
718 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
719 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
720 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
733 for(
size_t i=0UL; i<M; ++i ) {
735 xmm1 = xmm1 + x1 * A.load(i,j );
736 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
737 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
738 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
747 for(
size_t i=0UL; i<M; ++i ) {
749 xmm1 = xmm1 + x1 * A.load(i,j );
750 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
751 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
759 for(
size_t i=0UL; i<M; ++i ) {
761 xmm1 = xmm1 + x1 * A.load(i,j );
762 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
769 for(
size_t i=0UL; i<M; ++i ) {
770 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
792 template<
typename VT1
795 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
796 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
798 selectDefaultAssignKernel( y, x, A );
818 template<
typename VT1
821 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
822 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
824 sgemv( y, x, A, 1.0F, 0.0F );
845 template<
typename VT1
848 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
849 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
851 dgemv( y, x, A, 1.0, 0.0 );
872 template<
typename VT1
875 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
876 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
878 cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
899 template<
typename VT1
902 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
903 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
905 zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
924 template<
typename VT1 >
954 template<
typename VT1 >
961 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
973 TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
989 template<
typename VT1
992 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
994 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
996 TDVecDMatMultExpr::selectDefaultAddAssignKernel( y, x, A );
998 TDVecDMatMultExpr::selectBlasAddAssignKernel( y, x, A );
1017 template<
typename VT1
1020 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1021 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1023 const size_t M( A.rows() );
1024 const size_t N( A.columns() );
1027 const size_t jend( N &
size_t(-2) );
1029 for(
size_t i=0UL; i<M; ++i ) {
1030 for(
size_t j=0UL; j<jend; j+=2UL ) {
1031 y[j ] += x[i] * A(i,j );
1032 y[j+1UL] += x[i] * A(i,j+1UL);
1035 y[jend] += x[i] * A(i,jend);
1056 template<
typename VT1
1059 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1060 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1062 typedef IntrinsicTrait<ElementType> IT;
1064 const size_t M( A.rows() );
1065 const size_t N( A.columns() );
1078 for(
size_t i=0UL; i<M; ++i ) {
1080 xmm1 = xmm1 + x1 * A.load(i,j );
1081 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1082 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1083 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1084 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
1085 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
1086 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
1087 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
1089 y.store( j , xmm1 );
1103 for(
size_t i=0UL; i<M; ++i ) {
1105 xmm1 = xmm1 + x1 * A.load(i,j );
1106 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1107 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1108 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1110 y.store( j , xmm1 );
1119 for(
size_t i=0UL; i<M; ++i ) {
1121 xmm1 = xmm1 + x1 * A.load(i,j );
1122 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1123 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1125 y.store( j , xmm1 );
1132 for(
size_t i=0UL; i<M; ++i ) {
1134 xmm1 = xmm1 + x1 * A.load(i,j );
1135 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
1137 y.store( j , xmm1 );
1142 for(
size_t i=0UL; i<M; ++i ) {
1143 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
1165 template<
typename VT1
1168 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1169 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1171 selectDefaultAddAssignKernel( y, x, A );
1191 template<
typename VT1
1194 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1195 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1197 sgemv( y, x, A, 1.0F, 1.0F );
1218 template<
typename VT1
1221 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1222 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1224 dgemv( y, x, A, 1.0, 1.0 );
1245 template<
typename VT1
1248 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1249 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1251 cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1272 template<
typename VT1
1275 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1276 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1278 zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1301 template<
typename VT1 >
1308 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1320 TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1336 template<
typename VT1
1339 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1341 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1343 TDVecDMatMultExpr::selectDefaultSubAssignKernel( y, x, A );
1345 TDVecDMatMultExpr::selectBlasSubAssignKernel( y, x, A );
1364 template<
typename VT1
1367 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1368 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1370 const size_t M( A.rows() );
1371 const size_t N( A.columns() );
1374 const size_t jend( N &
size_t(-2) );
1376 for(
size_t i=0UL; i<M; ++i ) {
1377 for(
size_t j=0UL; j<jend; j+=2UL ) {
1378 y[j ] -= x[i] * A(i,j );
1379 y[j+1UL] -= x[i] * A(i,j+1UL);
1382 y[jend] -= x[i] * A(i,jend);
1403 template<
typename VT1
1406 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1407 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1409 typedef IntrinsicTrait<ElementType> IT;
1411 const size_t M( A.rows() );
1412 const size_t N( A.columns() );
1425 for(
size_t i=0UL; i<M; ++i ) {
1427 xmm1 = xmm1 - x1 * A.load(i,j );
1428 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1429 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1430 xmm4 = xmm4 - x1 * A.load(i,j+
IT::size*3UL);
1431 xmm5 = xmm5 - x1 * A.load(i,j+
IT::size*4UL);
1432 xmm6 = xmm6 - x1 * A.load(i,j+
IT::size*5UL);
1433 xmm7 = xmm7 - x1 * A.load(i,j+
IT::size*6UL);
1434 xmm8 = xmm8 - x1 * A.load(i,j+
IT::size*7UL);
1436 y.store( j , xmm1 );
1450 for(
size_t i=0UL; i<M; ++i ) {
1452 xmm1 = xmm1 - x1 * A.load(i,j );
1453 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1454 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1455 xmm4 = xmm4 - x1 * A.load(i,j+
IT::size*3UL);
1457 y.store( j , xmm1 );
1466 for(
size_t i=0UL; i<M; ++i ) {
1468 xmm1 = xmm1 - x1 * A.load(i,j );
1469 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1470 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1472 y.store( j , xmm1 );
1479 for(
size_t i=0UL; i<M; ++i ) {
1481 xmm1 = xmm1 - x1 * A.load(i,j );
1482 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size);
1484 y.store( j , xmm1 );
1489 for(
size_t i=0UL; i<M; ++i ) {
1490 xmm1 = xmm1 -
set( x[i] ) * A.load(i,j);
1512 template<
typename VT1
1515 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1516 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1518 selectDefaultSubAssignKernel( y, x, A );
1538 template<
typename VT1
1541 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1542 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1544 sgemv( y, x, A, -1.0F, 1.0F );
1565 template<
typename VT1
1568 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1569 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1571 dgemv( y, x, A, -1.0, 1.0 );
1592 template<
typename VT1
1595 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1596 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1598 cgemv( y, x, A, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1619 template<
typename VT1
1622 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1623 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1625 zgemv( y, x, A, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1648 template<
typename VT1 >
1684 template<
typename VT1 >
1685 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1692 if( rhs.mat_.rows() == 0UL ) {
1696 else if( rhs.mat_.columns() == 0UL ) {
1728 template<
typename VT1 >
1729 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1761 template<
typename VT1 >
1762 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1769 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1805 template<
typename VT1 >
1806 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1813 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1849 template<
typename VT1 >
1850 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1900 template<
typename VT
1904 :
public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1905 ,
private VecScalarMultExpr
1906 ,
private Computation
1910 typedef TDVecDMatMultExpr<VT,MT> VMM;
1922 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1927 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1928 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1936 template<
typename T1 >
1937 struct UseSMPAssign {
1938 enum { value = ( evaluateVector || evaluateMatrix ) };
1947 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1948 struct UseSinglePrecisionKernel {
1950 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1951 IsFloat<typename T1::ElementType>::value &&
1952 IsFloat<typename T2::ElementType>::value &&
1953 IsFloat<typename T3::ElementType>::value &&
1954 !IsComplex<T4>::value };
1963 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1964 struct UseDoublePrecisionKernel {
1966 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1967 IsDouble<typename T1::ElementType>::value &&
1968 IsDouble<typename T2::ElementType>::value &&
1969 IsDouble<typename T3::ElementType>::value &&
1970 !IsComplex<T4>::value };
1979 template<
typename T1,
typename T2,
typename T3 >
1980 struct UseSinglePrecisionComplexKernel {
1981 typedef complex<float> Type;
1983 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1984 IsSame<typename T1::ElementType,Type>::value &&
1985 IsSame<typename T2::ElementType,Type>::value &&
1986 IsSame<typename T3::ElementType,Type>::value };
1995 template<
typename T1,
typename T2,
typename T3 >
1996 struct UseDoublePrecisionComplexKernel {
1997 typedef complex<double> Type;
1999 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2000 IsSame<typename T1::ElementType,Type>::value &&
2001 IsSame<typename T2::ElementType,Type>::value &&
2002 IsSame<typename T3::ElementType,Type>::value };
2010 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2011 struct UseDefaultKernel {
2012 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2013 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2014 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2015 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2024 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2025 struct UseVectorizedDefaultKernel {
2026 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2027 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2028 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2029 IsSame<typename T1::ElementType,T4>::value &&
2030 IntrinsicTrait<typename T1::ElementType>::addition &&
2031 IntrinsicTrait<typename T1::ElementType>::multiplication };
2037 typedef DVecScalarMultExpr<VMM,ST,true>
This;
2038 typedef typename MultTrait<RES,ST>::Type
ResultType;
2041 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2046 typedef const TDVecDMatMultExpr<VT,MT>
LeftOperand;
2052 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
LT;
2055 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
RT;
2060 enum { vectorizable = VT::vectorizable && MT::vectorizable &&
2061 IsSame<VET,MET>::value &&
2062 IsSame<VET,ST>::value &&
2063 IntrinsicTrait<VET>::addition &&
2064 IntrinsicTrait<VET>::multiplication };
2067 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2068 !evaluateMatrix && MT::smpAssignable };
2077 explicit inline DVecScalarMultExpr(
const VMM& vector, ST scalar )
2091 return vector_[index] * scalar_;
2100 inline size_t size()
const {
2101 return vector_.size();
2131 template<
typename T >
2132 inline bool canAlias(
const T* alias )
const {
2133 return vector_.canAlias( alias );
2143 template<
typename T >
2144 inline bool isAliased(
const T* alias )
const {
2145 return vector_.isAliased( alias );
2155 return vector_.isAligned();
2165 typename VMM::RightOperand A( vector_.rightOperand() );
2167 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2195 template<
typename VT1
2198 static inline void sgemv( VT1& y,
const VT2& x,
const MT1& A,
float alpha,
float beta )
2200 using boost::numeric_cast;
2206 const int M ( numeric_cast<int>( A.rows() ) );
2207 const int N ( numeric_cast<int>( A.columns() ) );
2208 const int lda( numeric_cast<int>( A.spacing() ) );
2210 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, alpha,
2211 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2232 template<
typename VT1
2235 static inline void dgemv( VT1& y,
const VT2& x,
const MT1& A,
double alpha,
double beta )
2237 using boost::numeric_cast;
2243 const int M ( numeric_cast<int>( A.rows() ) );
2244 const int N ( numeric_cast<int>( A.columns() ) );
2245 const int lda( numeric_cast<int>( A.spacing() ) );
2247 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, alpha,
2248 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2269 template<
typename VT1
2272 static inline void cgemv( VT1& y,
const VT2& x,
const MT1& A,
2273 complex<float> alpha, complex<float> beta )
2275 using boost::numeric_cast;
2284 const int M ( numeric_cast<int>( A.rows() ) );
2285 const int N ( numeric_cast<int>( A.columns() ) );
2286 const int lda( numeric_cast<int>( A.spacing() ) );
2288 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2289 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2310 template<
typename VT1
2313 static inline void zgemv( VT1& y,
const VT2& x,
const MT1& A,
2314 complex<double> alpha, complex<float> beta )
2316 using boost::numeric_cast;
2325 const int M ( numeric_cast<int>( A.rows() ) );
2326 const int N ( numeric_cast<int>( A.columns() ) );
2327 const int lda( numeric_cast<int>( A.spacing() ) );
2329 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2330 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2347 template<
typename VT1 >
2348 friend inline void assign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2354 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2355 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2357 if( right.rows() == 0UL ) {
2361 else if( right.columns() == 0UL ) {
2373 DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2388 template<
typename VT1
2392 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2394 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2396 DVecScalarMultExpr::selectDefaultAssignKernel( y, x, A, scalar );
2398 DVecScalarMultExpr::selectBlasAssignKernel( y, x, A, scalar );
2416 template<
typename VT1
2420 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2421 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2423 const size_t M( A.rows() );
2424 const size_t N( A.columns() );
2427 const size_t jend( N &
size_t(-2) );
2429 for(
size_t j=0UL; j<N; ++j ) {
2430 y[j] = x[0UL] * A(0UL,j);
2432 for(
size_t i=1UL; i<M; ++i ) {
2433 for(
size_t j=0UL; j<jend; j+=2UL ) {
2434 y[j ] += x[i] * A(i,j );
2435 y[j+1UL] += x[i] * A(i,j+1UL);
2438 y[jend] += x[i] * A(i,jend);
2441 for(
size_t j=0UL; j<N; ++j ) {
2461 template<
typename VT1
2465 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2466 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2468 typedef IntrinsicTrait<ElementType> IT;
2470 const size_t M( A.rows() );
2471 const size_t N( A.columns() );
2478 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2479 for(
size_t i=0UL; i<M; ++i ) {
2481 xmm1 = xmm1 + x1 * A.load(i,j );
2482 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2483 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2484 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2485 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
2486 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
2487 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
2488 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
2490 y.store( j , xmm1*factor );
2491 y.store( j+
IT::size , xmm2*factor );
2492 y.store( j+
IT::size*2UL, xmm3*factor );
2493 y.store( j+
IT::size*3UL, xmm4*factor );
2494 y.store( j+
IT::size*4UL, xmm5*factor );
2495 y.store( j+
IT::size*5UL, xmm6*factor );
2496 y.store( j+
IT::size*6UL, xmm7*factor );
2497 y.store( j+
IT::size*7UL, xmm8*factor );
2501 for(
size_t i=0UL; i<M; ++i ) {
2503 xmm1 = xmm1 + x1 * A.load(i,j );
2504 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2505 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2506 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2508 y.store( j , xmm1*factor );
2509 y.store( j+
IT::size , xmm2*factor );
2510 y.store( j+
IT::size*2UL, xmm3*factor );
2511 y.store( j+
IT::size*3UL, xmm4*factor );
2515 for(
size_t i=0UL; i<M; ++i ) {
2517 xmm1 = xmm1 + x1 * A.load(i,j );
2518 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2519 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2521 y.store( j , xmm1*factor );
2522 y.store( j+
IT::size , xmm2*factor );
2523 y.store( j+
IT::size*2UL, xmm3*factor );
2527 for(
size_t i=0UL; i<M; ++i ) {
2529 xmm1 = xmm1 + x1 * A.load(i,j );
2530 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
2532 y.store( j , xmm1*factor );
2533 y.store( j+
IT::size, xmm2*factor );
2537 for(
size_t i=0UL; i<M; ++i ) {
2538 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2540 y.store( j, xmm1*factor );
2558 template<
typename VT1
2562 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2563 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2565 selectDefaultAssignKernel( y, x, A, scalar );
2584 template<
typename VT1
2588 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2589 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2591 sgemv( y, x, A, scalar, 0.0F );
2611 template<
typename VT1
2615 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2616 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2618 dgemv( y, x, A, scalar, 0.0 );
2638 template<
typename VT1
2642 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2643 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2645 cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
2665 template<
typename VT1
2669 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2670 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2672 zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
2689 template<
typename VT1 >
2690 friend inline void assign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2717 template<
typename VT1 >
2718 friend inline void addAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2724 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2725 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2727 if( right.rows() == 0UL || right.columns() == 0UL ) {
2739 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2754 template<
typename VT1
2758 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2760 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2762 DVecScalarMultExpr::selectDefaultAddAssignKernel( y, x, A, scalar );
2764 DVecScalarMultExpr::selectBlasAddAssignKernel( y, x, A, scalar );
2782 template<
typename VT1
2786 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2787 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2789 y.addAssign( x * A * scalar );
2807 template<
typename VT1
2811 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2812 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2814 typedef IntrinsicTrait<ElementType> IT;
2816 const size_t M( A.rows() );
2817 const size_t N( A.columns() );
2824 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2825 for(
size_t i=0UL; i<M; ++i ) {
2827 xmm1 = xmm1 + x1 * A.load(i,j );
2828 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2829 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2830 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2831 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
2832 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
2833 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
2834 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
2836 y.store( j , y.load(j ) + xmm1*factor );
2847 for(
size_t i=0UL; i<M; ++i ) {
2849 xmm1 = xmm1 + x1 * A.load(i,j );
2850 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2851 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2852 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2854 y.store( j , y.load(j ) + xmm1*factor );
2861 for(
size_t i=0UL; i<M; ++i ) {
2863 xmm1 = xmm1 + x1 * A.load(i,j );
2864 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2865 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2867 y.store( j , y.load(j ) + xmm1*factor );
2873 for(
size_t i=0UL; i<M; ++i ) {
2875 xmm1 = xmm1 + x1 * A.load(i,j );
2876 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
2878 y.store( j , y.load(j ) + xmm1*factor );
2883 for(
size_t i=0UL; i<M; ++i ) {
2884 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2886 y.store( j, y.load(j) + xmm1*factor );
2905 template<
typename VT1
2909 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2910 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2912 selectDefaultAddAssignKernel( y, x, A, scalar );
2931 template<
typename VT1
2935 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2936 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2938 sgemv( y, x, A, scalar, 1.0F );
2958 template<
typename VT1
2962 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2963 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2965 dgemv( y, x, A, scalar, 1.0 );
2985 template<
typename VT1
2989 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2990 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2992 cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3012 template<
typename VT1
3016 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3017 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3019 zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3040 template<
typename VT1 >
3041 friend inline void subAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3047 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3048 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3050 if( right.rows() == 0UL || right.columns() == 0UL ) {
3062 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
3077 template<
typename VT1
3081 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3083 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
3085 DVecScalarMultExpr::selectDefaultSubAssignKernel( y, x, A, scalar );
3087 DVecScalarMultExpr::selectBlasSubAssignKernel( y, x, A, scalar );
3105 template<
typename VT1
3109 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3110 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3112 y.subAssign( x * A * scalar );
3130 template<
typename VT1
3134 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3135 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3137 typedef IntrinsicTrait<ElementType> IT;
3139 const size_t M( A.rows() );
3140 const size_t N( A.columns() );
3147 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3148 for(
size_t i=0UL; i<M; ++i ) {
3150 xmm1 = xmm1 + x1 * A.load(i,j );
3151 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3152 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3153 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3154 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3155 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3156 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3157 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3159 y.store( j , y.load(j ) - xmm1*factor );
3170 for(
size_t i=0UL; i<M; ++i ) {
3172 xmm1 = xmm1 + x1 * A.load(i,j );
3173 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3174 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3175 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3177 y.store( j , y.load(j ) - xmm1*factor );
3184 for(
size_t i=0UL; i<M; ++i ) {
3186 xmm1 = xmm1 + x1 * A.load(i,j );
3187 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3188 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3190 y.store( j , y.load(j ) - xmm1*factor );
3196 for(
size_t i=0UL; i<M; ++i ) {
3198 xmm1 = xmm1 + x1 * A.load(i,j );
3199 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3201 y.store( j , y.load(j ) - xmm1*factor );
3206 for(
size_t i=0UL; i<M; ++i ) {
3207 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3209 y.store( j, y.load(j) - xmm1*factor );
3228 template<
typename VT1
3232 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3233 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3235 selectDefaultSubAssignKernel( y, x, A, scalar );
3254 template<
typename VT1
3258 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3259 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3261 sgemv( y, x, A, -scalar, 1.0F );
3281 template<
typename VT1
3285 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3286 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3288 dgemv( y, x, A, -scalar, 1.0 );
3308 template<
typename VT1
3312 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3313 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3315 cgemv( y, x, A, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3335 template<
typename VT1
3339 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3340 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3342 zgemv( y, x, A, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3363 template<
typename VT1 >
3364 friend inline void multAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3397 template<
typename VT1 >
3398 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3399 smpAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3405 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3406 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3408 if( right.rows() == 0UL ) {
3412 else if( right.columns() == 0UL ) {
3442 template<
typename VT1 >
3443 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3444 smpAssign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3473 template<
typename VT1 >
3474 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3475 smpAddAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3481 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3482 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3484 if( right.rows() == 0UL || right.columns() == 0UL ) {
3518 template<
typename VT1 >
3519 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3520 smpSubAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3526 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3527 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3529 if( right.rows() == 0UL || right.columns() == 0UL ) {
3564 template<
typename VT1 >
3565 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3566 smpMultAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3639 template<
typename T1
3641 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
3646 if( (~vec).
size() != (~mat).
rows() )
3647 throw std::invalid_argument(
"Vector and matrix sizes do not match" );
3675 template<
typename T1
3678 inline const typename EnableIf< IsMatMatMultExpr<T2>,
typename MultExprTrait<T1,T2>::Type >::Type
3700 template<
typename MT,
typename VT >
3702 :
public Columns<MT>
3718 template<
typename VT,
typename MT,
bool AF >
3723 typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:385
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:115
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:255
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:119
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:395
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:351
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
Header file for the DenseVector base class.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:304
Header file for the VecScalarMultExpr base class.
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:265
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:250
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:259
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:259
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:252
Header file for the multiplication trait.
Header file for the IsDouble type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:254
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:363
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:289
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
const size_t SMP_TDVECDMATMULT_THRESHOLD
SMP dense vector/row-major dense matrix multiplication threshold.This threshold specifies when a dens...
Definition: Thresholds.h:368
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Constraint on the data type.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:375
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the serial shim.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:331
Header file for the IsNumeric type trait.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:114
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:116
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:262
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:341
Constraint on the data type.
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:117
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:406
Constraint on the data type.
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:133
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:253
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:268
Header file for all intrinsic functionality.
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecDMatMultExpr.h:407
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:118
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:256
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:405
Constraint on the data type.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:251
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849