35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
105 template<
typename VT
107 class TDVecTDMatMultExpr :
public DenseVector< TDVecTDMatMultExpr<VT,MT>, true >
108 ,
private TVecMatMultExpr
109 ,
private Computation
138 template<
typename T1 >
139 struct UseSMPAssign {
140 enum { value = ( evaluateVector || evaluateMatrix ) };
151 template<
typename T1,
typename T2,
typename T3 >
152 struct UseSinglePrecisionKernel {
153 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
154 IsFloat<typename T1::ElementType>::value &&
155 IsFloat<typename T2::ElementType>::value &&
156 IsFloat<typename T3::ElementType>::value };
167 template<
typename T1,
typename T2,
typename T3 >
168 struct UseDoublePrecisionKernel {
169 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170 IsDouble<typename T1::ElementType>::value &&
171 IsDouble<typename T2::ElementType>::value &&
172 IsDouble<typename T3::ElementType>::value };
183 template<
typename T1,
typename T2,
typename T3 >
184 struct UseSinglePrecisionComplexKernel {
185 typedef complex<float> Type;
186 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187 IsSame<typename T1::ElementType,Type>::value &&
188 IsSame<typename T2::ElementType,Type>::value &&
189 IsSame<typename T3::ElementType,Type>::value };
200 template<
typename T1,
typename T2,
typename T3 >
201 struct UseDoublePrecisionComplexKernel {
202 typedef complex<double> Type;
203 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
204 IsSame<typename T1::ElementType,Type>::value &&
205 IsSame<typename T2::ElementType,Type>::value &&
206 IsSame<typename T3::ElementType,Type>::value };
216 template<
typename T1,
typename T2,
typename T3 >
217 struct UseDefaultKernel {
218 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
219 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
220 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
221 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
232 template<
typename T1,
typename T2,
typename T3 >
233 struct UseVectorizedDefaultKernel {
234 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237 IntrinsicTrait<typename T1::ElementType>::addition &&
238 IntrinsicTrait<typename T1::ElementType>::multiplication };
268 enum { vectorizable = VT::vectorizable && MT::vectorizable &&
274 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
275 !evaluateMatrix && MT::smpAssignable };
304 if(
mat_.rows() != 0UL ) {
306 for(
size_t j=1UL; j<
end_; j+=2UL ) {
309 if( end_ < mat_.rows() ) {
327 return mat_.columns();
357 template<
typename T >
359 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
369 template<
typename T >
371 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
381 return vec_.isAligned() &&
mat_.isAligned();
422 template<
typename VT1
425 static inline void sgemv( VT1& y,
const VT2& x,
const MT1& A,
float alpha,
float beta )
427 using boost::numeric_cast;
433 const int M ( numeric_cast<int>( A.rows() ) );
434 const int N ( numeric_cast<int>( A.columns() ) );
435 const int lda( numeric_cast<int>( A.spacing() ) );
437 cblas_sgemv( CblasColMajor, CblasTrans, M, N, alpha,
438 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
461 template<
typename VT1
464 static inline void dgemv( VT1& y,
const VT2& x,
const MT1& A,
double alpha,
double beta )
466 using boost::numeric_cast;
472 const int M ( numeric_cast<int>( A.rows() ) );
473 const int N ( numeric_cast<int>( A.columns() ) );
474 const int lda( numeric_cast<int>( A.spacing() ) );
476 cblas_dgemv( CblasColMajor, CblasTrans, M, N, alpha,
477 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
500 template<
typename VT1
503 static inline void cgemv( VT1& y,
const VT2& x,
const MT1& A,
504 complex<float> alpha, complex<float> beta )
506 using boost::numeric_cast;
515 const int M ( numeric_cast<int>( A.rows() ) );
516 const int N ( numeric_cast<int>( A.columns() ) );
517 const int lda( numeric_cast<int>( A.spacing() ) );
519 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
520 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
543 template<
typename VT1
546 static inline void zgemv( VT1& y,
const VT2& x,
const MT1& A,
547 complex<double> alpha, complex<double> beta )
549 using boost::numeric_cast;
558 const int M ( numeric_cast<int>( A.rows() ) );
559 const int N ( numeric_cast<int>( A.columns() ) );
560 const int lda( numeric_cast<int>( A.spacing() ) );
562 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
563 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
582 template<
typename VT1 >
589 if( rhs.mat_.rows() == 0UL ) {
593 else if( rhs.mat_.columns() == 0UL ) {
605 TDVecTDMatMultExpr::selectAssignKernel( ~lhs, x, A );
621 template<
typename VT1
624 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
626 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
628 TDVecTDMatMultExpr::selectDefaultAssignKernel( y, x, A );
630 TDVecTDMatMultExpr::selectBlasAssignKernel( y, x, A );
649 template<
typename VT1
652 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
653 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
674 template<
typename VT1
677 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
678 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
680 typedef IntrinsicTrait<ElementType> IT;
682 const size_t M( A.rows() );
683 const size_t N( A.columns() );
687 for( ; (j+8UL) <= N; j+=8UL ) {
688 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
689 for(
size_t i=0UL; i<M; i+=
IT::size ) {
691 xmm1 = xmm1 + x1 * A.load(i,j );
692 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
693 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
694 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
695 xmm5 = xmm5 + x1 * A.load(i,j+4UL);
696 xmm6 = xmm6 + x1 * A.load(i,j+5UL);
697 xmm7 = xmm7 + x1 * A.load(i,j+6UL);
698 xmm8 = xmm8 + x1 * A.load(i,j+7UL);
701 y[j+1UL] =
sum( xmm2 );
702 y[j+2UL] =
sum( xmm3 );
703 y[j+3UL] =
sum( xmm4 );
704 y[j+4UL] =
sum( xmm5 );
705 y[j+5UL] =
sum( xmm6 );
706 y[j+6UL] =
sum( xmm7 );
707 y[j+7UL] =
sum( xmm8 );
709 for( ; (j+4UL) <= N; j+=4UL ) {
711 for(
size_t i=0UL; i<M; i+=
IT::size ) {
713 xmm1 = xmm1 + x1 * A.load(i,j );
714 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
715 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
716 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
719 y[j+1UL] =
sum( xmm2 );
720 y[j+2UL] =
sum( xmm3 );
721 y[j+3UL] =
sum( xmm4 );
723 for( ; (j+3UL) <= N; j+=3UL ) {
725 for(
size_t i=0UL; i<M; i+=
IT::size ) {
727 xmm1 = xmm1 + x1 * A.load(i,j );
728 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
729 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
732 y[j+1UL] =
sum( xmm2 );
733 y[j+2UL] =
sum( xmm3 );
735 for( ; (j+2UL) <= N; j+=2UL ) {
737 for(
size_t i=0UL; i<M; i+=
IT::size ) {
739 xmm1 = xmm1 + x1 * A.load(i,j );
740 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
743 y[j+1UL] =
sum( xmm2 );
747 for(
size_t i=0UL; i<M; i+=
IT::size ) {
748 xmm1 = xmm1 + A.load(i,j) * x.load(i);
770 template<
typename VT1
773 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
774 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
776 selectDefaultAssignKernel( y, x, A );
796 template<
typename VT1
799 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
800 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
802 sgemv( y, x, A, 1.0F, 0.0F );
823 template<
typename VT1
826 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
827 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
829 dgemv( y, x, A, 1.0, 0.0 );
850 template<
typename VT1
853 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
854 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
856 cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
877 template<
typename VT1
880 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
881 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
883 zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
902 template<
typename VT1 >
932 template<
typename VT1 >
939 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
951 TDVecTDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
967 template<
typename VT1
970 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
972 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
974 TDVecTDMatMultExpr::selectDefaultAddAssignKernel( y, x, A );
976 TDVecTDMatMultExpr::selectBlasAddAssignKernel( y, x, A );
995 template<
typename VT1
998 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
999 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1001 y.addAssign( x * A );
1020 template<
typename VT1
1023 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1024 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1026 typedef IntrinsicTrait<ElementType> IT;
1028 const size_t M( A.rows() );
1029 const size_t N( A.columns() );
1033 for( ; (j+8UL) <= N; j+=8UL ) {
1034 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1035 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1037 xmm1 = xmm1 + x1 * A.load(i,j );
1038 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1039 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1040 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1041 xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1042 xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1043 xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1044 xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1046 y[j ] +=
sum( xmm1 );
1047 y[j+1UL] +=
sum( xmm2 );
1048 y[j+2UL] +=
sum( xmm3 );
1049 y[j+3UL] +=
sum( xmm4 );
1050 y[j+4UL] +=
sum( xmm5 );
1051 y[j+5UL] +=
sum( xmm6 );
1052 y[j+6UL] +=
sum( xmm7 );
1053 y[j+7UL] +=
sum( xmm8 );
1055 for( ; (j+4UL) <= N; j+=4UL ) {
1057 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1059 xmm1 = xmm1 + x1 * A.load(i,j );
1060 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1061 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1062 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1064 y[j ] +=
sum( xmm1 );
1065 y[j+1UL] +=
sum( xmm2 );
1066 y[j+2UL] +=
sum( xmm3 );
1067 y[j+3UL] +=
sum( xmm4 );
1069 for( ; (j+3UL) <= N; j+=3UL ) {
1071 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1073 xmm1 = xmm1 + x1 * A.load(i,j );
1074 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1075 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1077 y[j ] +=
sum( xmm1 );
1078 y[j+1UL] +=
sum( xmm2 );
1079 y[j+2UL] +=
sum( xmm3 );
1081 for( ; (j+2UL) <= N; j+=2UL ) {
1083 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1085 xmm1 = xmm1 + x1 * A.load(i,j );
1086 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1088 y[j ] +=
sum( xmm1 );
1089 y[j+1UL] +=
sum( xmm2 );
1093 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1094 xmm1 = xmm1 + A.load(i,j) * x.load(i);
1096 y[j] +=
sum( xmm1 );
1116 template<
typename VT1
1119 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1120 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1122 selectDefaultAddAssignKernel( y, x, A );
1142 template<
typename VT1
1145 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1146 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1148 sgemv( y, x, A, 1.0F, 1.0F );
1169 template<
typename VT1
1172 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1173 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1175 dgemv( y, x, A, 1.0, 1.0 );
1196 template<
typename VT1
1199 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1200 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1202 cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1223 template<
typename VT1
1226 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1227 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1229 zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1252 template<
typename VT1 >
1259 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1271 TDVecTDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1287 template<
typename VT1
1290 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1292 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1294 TDVecTDMatMultExpr::selectDefaultSubAssignKernel( y, x, A );
1296 TDVecTDMatMultExpr::selectBlasSubAssignKernel( y, x, A );
1315 template<
typename VT1
1318 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1319 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1321 y.subAssign( x * A );
1340 template<
typename VT1
1343 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1344 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1346 typedef IntrinsicTrait<ElementType> IT;
1348 const size_t M( A.rows() );
1349 const size_t N( A.columns() );
1353 for( ; (j+8UL) <= N; j+=8UL ) {
1354 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1355 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1357 xmm1 = xmm1 + x1 * A.load(i,j );
1358 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1359 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1360 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1361 xmm5 = xmm5 + x1 * A.load(i,j+4UL);
1362 xmm6 = xmm6 + x1 * A.load(i,j+5UL);
1363 xmm7 = xmm7 + x1 * A.load(i,j+6UL);
1364 xmm8 = xmm8 + x1 * A.load(i,j+7UL);
1366 y[j ] -=
sum( xmm1 );
1367 y[j+1UL] -=
sum( xmm2 );
1368 y[j+2UL] -=
sum( xmm3 );
1369 y[j+3UL] -=
sum( xmm4 );
1370 y[j+4UL] -=
sum( xmm5 );
1371 y[j+5UL] -=
sum( xmm6 );
1372 y[j+6UL] -=
sum( xmm7 );
1373 y[j+7UL] -=
sum( xmm8 );
1375 for( ; (j+4UL) <= N; j+=4UL ) {
1377 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1379 xmm1 = xmm1 + x1 * A.load(i,j );
1380 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1381 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1382 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
1384 y[j ] -=
sum( xmm1 );
1385 y[j+1UL] -=
sum( xmm2 );
1386 y[j+2UL] -=
sum( xmm3 );
1387 y[j+3UL] -=
sum( xmm4 );
1389 for( ; (j+3UL) <= N; j+=3UL ) {
1391 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1393 xmm1 = xmm1 + x1 * A.load(i,j );
1394 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1395 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
1397 y[j ] -=
sum( xmm1 );
1398 y[j+1UL] -=
sum( xmm2 );
1399 y[j+2UL] -=
sum( xmm3 );
1401 for( ; (j+2UL) <= N; j+=2UL ) {
1403 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1405 xmm1 = xmm1 + x1 * A.load(i,j );
1406 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
1408 y[j ] -=
sum( xmm1 );
1409 y[j+1UL] -=
sum( xmm2 );
1413 for(
size_t i=0UL; i<M; i+=
IT::size ) {
1414 xmm1 = xmm1 + A.load(i,j) * x.load(i);
1416 y[j] -=
sum( xmm1 );
1436 template<
typename VT1
1439 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1440 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1442 selectDefaultSubAssignKernel( y, x, A );
1462 template<
typename VT1
1465 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1466 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1468 sgemv( y, x, A, -1.0F, 1.0F );
1489 template<
typename VT1
1492 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1493 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1495 dgemv( y, x, A, -1.0, 1.0 );
1516 template<
typename VT1
1519 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1520 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1522 cgemv( y, x, A, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1543 template<
typename VT1
1546 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1547 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1549 zgemv( y, x, A, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1572 template<
typename VT1 >
1608 template<
typename VT1 >
1609 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1616 if( rhs.mat_.rows() == 0UL ) {
1620 else if( rhs.mat_.columns() == 0UL ) {
1652 template<
typename VT1 >
1653 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1685 template<
typename VT1 >
1686 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1693 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1729 template<
typename VT1 >
1730 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1737 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1773 template<
typename VT1 >
1774 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1824 template<
typename VT
1828 :
public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1829 ,
private VecScalarMultExpr
1830 ,
private Computation
1834 typedef TDVecTDMatMultExpr<VT,MT> VMM;
1846 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1851 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1852 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1860 template<
typename T1 >
1861 struct UseSMPAssign {
1862 enum { value = T1::smpAssignable && ( evaluateVector || evaluateMatrix ) };
1871 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1872 struct UseSinglePrecisionKernel {
1873 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1874 IsFloat<typename T1::ElementType>::value &&
1875 IsFloat<typename T2::ElementType>::value &&
1876 IsFloat<typename T3::ElementType>::value &&
1877 !IsComplex<T4>::value };
1886 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1887 struct UseDoublePrecisionKernel {
1888 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1889 IsDouble<typename T1::ElementType>::value &&
1890 IsDouble<typename T2::ElementType>::value &&
1891 IsDouble<typename T3::ElementType>::value &&
1892 !IsComplex<T4>::value };
1901 template<
typename T1,
typename T2,
typename T3 >
1902 struct UseSinglePrecisionComplexKernel {
1903 typedef complex<float> Type;
1904 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1905 IsSame<typename T1::ElementType,Type>::value &&
1906 IsSame<typename T2::ElementType,Type>::value &&
1907 IsSame<typename T3::ElementType,Type>::value };
1916 template<
typename T1,
typename T2,
typename T3 >
1917 struct UseDoublePrecisionComplexKernel {
1918 typedef complex<double> Type;
1919 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1920 IsSame<typename T1::ElementType,Type>::value &&
1921 IsSame<typename T2::ElementType,Type>::value &&
1922 IsSame<typename T3::ElementType,Type>::value };
1930 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1931 struct UseDefaultKernel {
1932 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1933 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1934 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1935 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1944 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1945 struct UseVectorizedDefaultKernel {
1946 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1947 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1948 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1949 IsSame<typename T1::ElementType,T4>::value &&
1950 IntrinsicTrait<typename T1::ElementType>::addition &&
1951 IntrinsicTrait<typename T1::ElementType>::multiplication };
1957 typedef DVecScalarMultExpr<VMM,ST,true>
This;
1958 typedef typename MultTrait<RES,ST>::Type
ResultType;
1961 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1966 typedef const TDVecTDMatMultExpr<VT,MT>
LeftOperand;
1972 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
LT;
1975 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
RT;
1980 enum { vectorizable = VT::vectorizable && MT::vectorizable &&
1981 IsSame<VET,MET>::value &&
1982 IsSame<VET,ST>::value &&
1983 IntrinsicTrait<VET>::addition &&
1984 IntrinsicTrait<VET>::multiplication };
1987 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
1988 !evaluateMatrix && MT::smpAssignable };
1997 explicit inline DVecScalarMultExpr(
const VMM& vector, ST scalar )
2011 return vector_[index] * scalar_;
2020 inline size_t size()
const {
2021 return vector_.size();
2051 template<
typename T >
2052 inline bool canAlias(
const T* alias )
const {
2053 return vector_.canAlias( alias );
2063 template<
typename T >
2064 inline bool isAliased(
const T* alias )
const {
2065 return vector_.isAliased( alias );
2075 return vector_.isAligned();
2085 typename VMM::RightOperand A( vector_.rightOperand() );
2087 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2115 template<
typename VT1
2118 static inline void sgemv( VT1& y,
const VT2& x,
const MT1& A,
float alpha,
float beta )
2120 using boost::numeric_cast;
2126 const int M ( numeric_cast<int>( A.rows() ) );
2127 const int N ( numeric_cast<int>( A.columns() ) );
2128 const int lda( numeric_cast<int>( A.spacing() ) );
2130 cblas_sgemv( CblasColMajor, CblasTrans, M, N, alpha,
2131 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2152 template<
typename VT1
2155 static inline void dgemv( VT1& y,
const VT2& x,
const MT1& A,
double alpha,
double beta )
2157 using boost::numeric_cast;
2163 const int M ( numeric_cast<int>( A.rows() ) );
2164 const int N ( numeric_cast<int>( A.columns() ) );
2165 const int lda( numeric_cast<int>( A.spacing() ) );
2167 cblas_dgemv( CblasColMajor, CblasTrans, M, N, alpha,
2168 A.data(), lda, x.data(), 1, beta, y.data(), 1 );
2189 template<
typename VT1
2192 static inline void cgemv( VT1& y,
const VT2& x,
const MT1& A,
2193 complex<float> alpha, complex<float> beta )
2195 using boost::numeric_cast;
2204 const int M ( numeric_cast<int>( A.rows() ) );
2205 const int N ( numeric_cast<int>( A.columns() ) );
2206 const int lda( numeric_cast<int>( A.spacing() ) );
2208 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2209 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2230 template<
typename VT1
2233 static inline void zgemv( VT1& y,
const VT2& x,
const MT1& A,
2234 complex<double> alpha, complex<double> beta )
2236 using boost::numeric_cast;
2245 const int M ( numeric_cast<int>( A.rows() ) );
2246 const int N ( numeric_cast<int>( A.columns() ) );
2247 const int lda( numeric_cast<int>( A.spacing() ) );
2249 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2250 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2267 template<
typename VT1
2269 friend inline void assign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2275 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2276 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2278 if( right.rows() == 0UL ) {
2282 else if( right.columns() == 0UL ) {
2294 DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2309 template<
typename VT1
2313 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2315 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2317 DVecScalarMultExpr::selectDefaultAssignKernel( y, x, A, scalar );
2319 DVecScalarMultExpr::selectBlasAssignKernel( y, x, A, scalar );
2337 template<
typename VT1
2341 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2342 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2344 y.assign( x * A * scalar );
2362 template<
typename VT1
2366 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2367 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2369 typedef IntrinsicTrait<ElementType> IT;
2371 const size_t M( A.rows() );
2372 const size_t N( A.columns() );
2376 for( ; (j+8UL) <= N; j+=8UL ) {
2377 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2378 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2380 xmm1 = xmm1 + x1 * A.load(i,j );
2381 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2382 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2383 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2384 xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2385 xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2386 xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2387 xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2389 y[j ] =
sum( xmm1 ) * scalar;
2390 y[j+1UL] =
sum( xmm2 ) * scalar;
2391 y[j+2UL] =
sum( xmm3 ) * scalar;
2392 y[j+3UL] =
sum( xmm4 ) * scalar;
2393 y[j+4UL] =
sum( xmm5 ) * scalar;
2394 y[j+5UL] =
sum( xmm6 ) * scalar;
2395 y[j+6UL] =
sum( xmm7 ) * scalar;
2396 y[j+7UL] =
sum( xmm8 ) * scalar;
2398 for( ; (j+4UL) <= N; j+=4UL ) {
2400 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2402 xmm1 = xmm1 + x1 * A.load(i,j );
2403 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2404 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2405 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2407 y[j ] =
sum( xmm1 ) * scalar;
2408 y[j+1UL] =
sum( xmm2 ) * scalar;
2409 y[j+2UL] =
sum( xmm3 ) * scalar;
2410 y[j+3UL] =
sum( xmm4 ) * scalar;
2412 for( ; (j+3UL) <= N; j+=3UL ) {
2414 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2416 xmm1 = xmm1 + x1 * A.load(i,j );
2417 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2418 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2420 y[j ] =
sum( xmm1 ) * scalar;
2421 y[j+1UL] =
sum( xmm2 ) * scalar;
2422 y[j+2UL] =
sum( xmm3 ) * scalar;
2424 for( ; (j+2UL) <= N; j+=2UL ) {
2426 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2428 xmm1 = xmm1 + x1 * A.load(i,j );
2429 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2431 y[j ] =
sum( xmm1 ) * scalar;
2432 y[j+1UL] =
sum( xmm2 ) * scalar;
2436 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2437 xmm1 = xmm1 + A.load(i,j) * x.load(i);
2439 y[j] =
sum( xmm1 ) * scalar;
2457 template<
typename VT1
2461 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2462 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2464 selectDefaultAssignKernel( y, x, A, scalar );
2483 template<
typename VT1
2487 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2488 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2490 sgemv( y, x, A, scalar, 0.0F );
2510 template<
typename VT1
2514 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2515 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2517 dgemv( y, x, A, scalar, 0.0 );
2538 template<
typename VT1
2542 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2543 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2545 cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
2566 template<
typename VT1
2570 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2571 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2573 zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
2590 template<
typename VT1
2592 friend inline void assign( SparseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2619 template<
typename VT1
2621 friend inline void addAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2627 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2628 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2630 if( right.rows() == 0UL || right.columns() == 0UL ) {
2642 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2657 template<
typename VT1
2661 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2663 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2665 DVecScalarMultExpr::selectDefaultAddAssignKernel( y, x, A, scalar );
2667 DVecScalarMultExpr::selectBlasAddAssignKernel( y, x, A, scalar );
2685 template<
typename VT1
2689 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2690 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2692 y.addAssign( x * A * scalar );
2710 template<
typename VT1
2714 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2715 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2717 typedef IntrinsicTrait<ElementType> IT;
2719 const size_t M( A.rows() );
2720 const size_t N( A.columns() );
2724 for( ; (j+8UL) <= N; j+=8UL ) {
2725 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2726 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2728 xmm1 = xmm1 + x1 * A.load(i,j );
2729 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2730 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2731 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2732 xmm5 = xmm5 + x1 * A.load(i,j+4UL);
2733 xmm6 = xmm6 + x1 * A.load(i,j+5UL);
2734 xmm7 = xmm7 + x1 * A.load(i,j+6UL);
2735 xmm8 = xmm8 + x1 * A.load(i,j+7UL);
2737 y[j ] +=
sum( xmm1 ) * scalar;
2738 y[j+1UL] +=
sum( xmm2 ) * scalar;
2739 y[j+2UL] +=
sum( xmm3 ) * scalar;
2740 y[j+3UL] +=
sum( xmm4 ) * scalar;
2741 y[j+4UL] +=
sum( xmm5 ) * scalar;
2742 y[j+5UL] +=
sum( xmm6 ) * scalar;
2743 y[j+6UL] +=
sum( xmm7 ) * scalar;
2744 y[j+7UL] +=
sum( xmm8 ) * scalar;
2746 for( ; (j+4UL) <= N; j+=4UL ) {
2748 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2750 xmm1 = xmm1 + x1 * A.load(i,j );
2751 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2752 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2753 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
2755 y[j ] +=
sum( xmm1 ) * scalar;
2756 y[j+1UL] +=
sum( xmm2 ) * scalar;
2757 y[j+2UL] +=
sum( xmm3 ) * scalar;
2758 y[j+3UL] +=
sum( xmm4 ) * scalar;
2760 for( ; (j+3UL) <= N; j+=3UL ) {
2762 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2764 xmm1 = xmm1 + x1 * A.load(i,j );
2765 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2766 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
2768 y[j ] +=
sum( xmm1 ) * scalar;
2769 y[j+1UL] +=
sum( xmm2 ) * scalar;
2770 y[j+2UL] +=
sum( xmm3 ) * scalar;
2772 for( ; (j+2UL) <= N; j+=2UL ) {
2774 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2776 xmm1 = xmm1 + x1 * A.load(i,j );
2777 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
2779 y[j ] +=
sum( xmm1 ) * scalar;
2780 y[j+1UL] +=
sum( xmm2 ) * scalar;
2784 for(
size_t i=0UL; i<M; i+=
IT::size ) {
2785 xmm1 = xmm1 + A.load(i,j) * x.load(i);
2787 y[j] +=
sum( xmm1 ) * scalar;
2806 template<
typename VT1
2810 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2811 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2813 selectDefaultAddAssignKernel( y, x, A, scalar );
2832 template<
typename VT1
2836 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2837 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2839 sgemv( y, x, A, scalar, 1.0F );
2859 template<
typename VT1
2863 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2864 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2866 dgemv( y, x, A, scalar, 1.0 );
2887 template<
typename VT1
2891 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2892 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2894 cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2915 template<
typename VT1
2919 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2920 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2922 zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
2943 template<
typename VT1
2945 friend inline void subAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2951 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2952 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2954 if( right.rows() == 0UL || right.columns() == 0UL ) {
2966 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2981 template<
typename VT1
2985 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2987 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2989 DVecScalarMultExpr::selectDefaultSubAssignKernel( y, x, A, scalar );
2991 DVecScalarMultExpr::selectBlasSubAssignKernel( y, x, A, scalar );
3009 template<
typename VT1
3013 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3014 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3016 y.subAssign( x * A * scalar );
3034 template<
typename VT1
3038 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3039 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3041 typedef IntrinsicTrait<ElementType> IT;
3043 const size_t M( A.rows() );
3044 const size_t N( A.columns() );
3048 for( ; (j+8UL) <= N; j+=8UL ) {
3049 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3050 for(
size_t i=0UL; i<M; i+=
IT::size ) {
3052 xmm1 = xmm1 + x1 * A.load(i,j );
3053 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3054 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3055 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
3056 xmm5 = xmm5 + x1 * A.load(i,j+4UL);
3057 xmm6 = xmm6 + x1 * A.load(i,j+5UL);
3058 xmm7 = xmm7 + x1 * A.load(i,j+6UL);
3059 xmm8 = xmm8 + x1 * A.load(i,j+7UL);
3061 y[j ] -=
sum( xmm1 ) * scalar;
3062 y[j+1UL] -=
sum( xmm2 ) * scalar;
3063 y[j+2UL] -=
sum( xmm3 ) * scalar;
3064 y[j+3UL] -=
sum( xmm4 ) * scalar;
3065 y[j+4UL] -=
sum( xmm5 ) * scalar;
3066 y[j+5UL] -=
sum( xmm6 ) * scalar;
3067 y[j+6UL] -=
sum( xmm7 ) * scalar;
3068 y[j+7UL] -=
sum( xmm8 ) * scalar;
3070 for( ; (j+4UL) <= N; j+=4UL ) {
3072 for(
size_t i=0UL; i<M; i+=
IT::size ) {
3074 xmm1 = xmm1 + x1 * A.load(i,j );
3075 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3076 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3077 xmm4 = xmm4 + x1 * A.load(i,j+3UL);
3079 y[j ] -=
sum( xmm1 ) * scalar;
3080 y[j+1UL] -=
sum( xmm2 ) * scalar;
3081 y[j+2UL] -=
sum( xmm3 ) * scalar;
3082 y[j+3UL] -=
sum( xmm4 ) * scalar;
3084 for( ; (j+3UL) <= N; j+=3UL ) {
3086 for(
size_t i=0UL; i<M; i+=
IT::size ) {
3088 xmm1 = xmm1 + x1 * A.load(i,j );
3089 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3090 xmm3 = xmm3 + x1 * A.load(i,j+2UL);
3092 y[j ] -=
sum( xmm1 ) * scalar;
3093 y[j+1UL] -=
sum( xmm2 ) * scalar;
3094 y[j+2UL] -=
sum( xmm3 ) * scalar;
3096 for( ; (j+2UL) <= N; j+=2UL ) {
3098 for(
size_t i=0UL; i<M; i+=
IT::size ) {
3100 xmm1 = xmm1 + x1 * A.load(i,j );
3101 xmm2 = xmm2 + x1 * A.load(i,j+1UL);
3103 y[j ] -=
sum( xmm1 ) * scalar;
3104 y[j+1UL] -=
sum( xmm2 ) * scalar;
3108 for(
size_t i=0UL; i<M; i+=
IT::size ) {
3109 xmm1 = xmm1 + A.load(i,j) * x.load(i);
3111 y[j] -=
sum( xmm1 ) * scalar;
3131 template<
typename VT1
3135 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3136 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3138 selectDefaultSubAssignKernel( y, x, A, scalar );
3157 template<
typename VT1
3161 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3162 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3164 sgemv( y, x, A, -scalar, 1.0F );
3184 template<
typename VT1
3188 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3189 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3191 dgemv( y, x, A, -scalar, 1.0 );
3213 template<
typename VT1
3217 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3218 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3220 cgemv( y, x, A, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3242 template<
typename VT1
3246 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3247 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3249 zgemv( y, x, A, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3270 template<
typename VT1
3272 friend inline void multAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
3305 template<
typename VT1
3307 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3308 smpAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
3314 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3315 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3317 if( right.rows() == 0UL ) {
3321 else if( right.columns() == 0UL ) {
3351 template<
typename VT1
3353 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3354 smpAssign( SparseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
3383 template<
typename VT1
3385 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3386 smpAddAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
3392 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3393 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3395 if( right.rows() == 0UL || right.columns() == 0UL ) {
3429 template<
typename VT1
3431 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3432 smpSubAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
3438 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3439 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3441 if( right.rows() == 0UL || right.columns() == 0UL ) {
3475 template<
typename VT1
3477 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3478 smpMultAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
3551 template<
typename T1
3553 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecTDMatMultExpr<T1,T2> >::Type
3558 if( (~vec).
size() != (~mat).
rows() )
3559 throw std::invalid_argument(
"Vector and matrix sizes do not match" );
3576 template<
typename MT,
typename VT >
3578 :
public Columns<MT>
3594 template<
typename VT,
typename MT,
bool AF >
3599 typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:254
Expression object for transpose dense vector-transpose dense matrix multiplications.The TDVecTDMatMultExpr class represents the compile time expression for multiplications between transpose dense vectors and column-major dense matrices.
Definition: Forward.h:135
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecTDMatMultExpr.h:402
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:336
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDVecTDMatMultExpr.h:346
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
const size_t TDVECTDMATMULT_THRESHOLD
Dense Vector/column-major dense matrix multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:108
Constraint on the data type.
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:118
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:116
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecTDMatMultExpr.h:263
Header file for the DenseVector base class.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:257
Header file for the RequiresEvaluation type trait.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:259
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecTDMatMultExpr.h:380
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecTDMatMultExpr.h:358
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecTDMatMultExpr.h:248
Header file for the IsMatMatMultExpr type trait class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
Header file for the IsBlasCompatible type trait.
TDVecTDMatMultExpr< VT, MT > This
Type of this TDVecTDMatMultExpr instance.
Definition: TDVecTDMatMultExpr.h:245
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:117
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecTDMatMultExpr.h:249
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecTDMatMultExpr.h:260
Constraints on the storage order of matrix types.
Constraint on the data type.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecTDMatMultExpr.h:114
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecTDMatMultExpr.h:113
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecTDMatMultExpr.h:299
Header file for the EnableIf class template.
Header file for the serial shim.
Header file for the IsNumeric type trait.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecTDMatMultExpr.h:390
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:400
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecTDMatMultExpr.h:251
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:250
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecTDMatMultExpr.h:115
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:247
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecTDMatMultExpr.h:401
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecTDMatMultExpr.h:370
Header file for the IsComputation type trait class.
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecTDMatMultExpr.h:246
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
TDVecTDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecTDMatMultExpr class.
Definition: TDVecTDMatMultExpr.h:284
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
const size_t SMP_TDVECTDMATMULT_THRESHOLD
SMP dense vector/column-major dense matrix multiplication threshold.This threshold specifies when a d...
Definition: Thresholds.h:391
Constraint on the data type.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecTDMatMultExpr.h:326
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849