35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
44 #include <boost/cast.hpp>
101 template<
typename MT
134 template<
typename T1,
typename T2,
typename T3 >
135 struct UseSinglePrecisionKernel {
136 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
137 IsFloat<typename T1::ElementType>::value &&
138 IsFloat<typename T2::ElementType>::value &&
139 IsFloat<typename T3::ElementType>::value };
150 template<
typename T1,
typename T2,
typename T3 >
151 struct UseDoublePrecisionKernel {
152 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
153 IsDouble<typename T1::ElementType>::value &&
154 IsDouble<typename T2::ElementType>::value &&
155 IsDouble<typename T3::ElementType>::value };
166 template<
typename T1,
typename T2,
typename T3 >
167 struct UseSinglePrecisionComplexKernel {
168 typedef complex<float> Type;
169 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
170 IsSame<typename T1::ElementType,Type>::value &&
171 IsSame<typename T2::ElementType,Type>::value &&
172 IsSame<typename T3::ElementType,Type>::value };
183 template<
typename T1,
typename T2,
typename T3 >
184 struct UseDoublePrecisionComplexKernel {
185 typedef complex<double> Type;
186 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187 IsSame<typename T1::ElementType,Type>::value &&
188 IsSame<typename T2::ElementType,Type>::value &&
189 IsSame<typename T3::ElementType,Type>::value };
199 template<
typename T1,
typename T2,
typename T3 >
200 struct UseDefaultKernel {
201 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
202 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
203 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
204 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
215 template<
typename T1,
typename T2,
typename T3 >
216 struct UseVectorizedDefaultKernel {
217 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
218 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
219 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
220 IntrinsicTrait<typename T1::ElementType>::addition &&
221 IntrinsicTrait<typename T1::ElementType>::multiplication };
251 enum { vectorizable = 0 };
254 enum { smpAssignable = 0 };
283 if(
mat_.columns() != 0UL ) {
285 for(
size_t j=1UL; j<
end_; j+=2UL ) {
288 if( end_ <
mat_.columns() ) {
336 template<
typename T >
338 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
348 template<
typename T >
350 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
374 template<
typename VT1 >
381 if( rhs.
mat_.rows() == 0UL ) {
384 else if( rhs.
mat_.columns() == 0UL ) {
399 DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
401 DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
420 template<
typename VT1
424 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
445 template<
typename VT1
448 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
449 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
451 typedef IntrinsicTrait<ElementType> IT;
453 const size_t M( A.rows() );
454 const size_t N( A.columns() );
458 for( ; (i+8UL) <= M; i+=8UL ) {
459 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
460 for(
size_t j=0UL; j<N; j+=IT::size ) {
462 xmm1 = xmm1 + A.load(i ,j) * x1;
463 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
464 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
465 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
466 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
467 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
468 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
469 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
472 y[i+1UL] =
sum( xmm2 );
473 y[i+2UL] =
sum( xmm3 );
474 y[i+3UL] =
sum( xmm4 );
475 y[i+4UL] =
sum( xmm5 );
476 y[i+5UL] =
sum( xmm6 );
477 y[i+6UL] =
sum( xmm7 );
478 y[i+7UL] =
sum( xmm8 );
480 for( ; (i+4UL) <= M; i+=4UL ) {
482 for(
size_t j=0UL; j<N; j+=IT::size ) {
484 xmm1 = xmm1 + A.load(i ,j) * x1;
485 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
486 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
487 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
490 y[i+1UL] =
sum( xmm2 );
491 y[i+2UL] =
sum( xmm3 );
492 y[i+3UL] =
sum( xmm4 );
494 for( ; (i+3UL) <= M; i+=3UL ) {
496 for(
size_t j=0UL; j<N; j+=IT::size ) {
498 xmm1 = xmm1 + A.load(i ,j) * x1;
499 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
500 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
503 y[i+1UL] =
sum( xmm2 );
504 y[i+2UL] =
sum( xmm3 );
506 for( ; (i+2UL) <= M; i+=2UL ) {
508 for(
size_t j=0UL; j<N; j+=IT::size ) {
510 xmm1 = xmm1 + A.load(i ,j) * x1;
511 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
514 y[i+1UL] =
sum( xmm2 );
518 for(
size_t j=0UL; j<N; j+=IT::size ) {
519 xmm1 = xmm1 + A.load(i,j) * x.load(j);
541 template<
typename VT1
544 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
545 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
547 selectDefaultAssignKernel( y, A, x );
567 template<
typename VT1
570 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
571 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
573 using boost::numeric_cast;
579 const int M ( numeric_cast<int>( A.rows() ) );
580 const int N ( numeric_cast<int>( A.columns() ) );
581 const int lda( numeric_cast<int>( A.spacing() ) );
583 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
584 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
605 template<
typename VT1
608 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
609 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
611 using boost::numeric_cast;
617 const int M ( numeric_cast<int>( A.rows() ) );
618 const int N ( numeric_cast<int>( A.columns() ) );
619 const int lda( numeric_cast<int>( A.spacing() ) );
621 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
622 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
643 template<
typename VT1
646 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
647 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
649 using boost::numeric_cast;
658 const int M ( numeric_cast<int>( A.rows() ) );
659 const int N ( numeric_cast<int>( A.columns() ) );
660 const int lda( numeric_cast<int>( A.spacing() ) );
661 const complex<float> alpha( 1.0F, 0.0F );
662 const complex<float> beta ( 0.0F, 0.0F );
664 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
665 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
686 template<
typename VT1
689 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
690 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
692 using boost::numeric_cast;
701 const int M ( numeric_cast<int>( A.rows() ) );
702 const int N ( numeric_cast<int>( A.columns() ) );
703 const int lda( numeric_cast<int>( A.spacing() ) );
704 const complex<double> alpha( 1.0, 0.0 );
705 const complex<double> beta ( 0.0, 0.0 );
707 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
708 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
727 template<
typename VT1 >
757 template<
typename VT1 >
764 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
776 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
778 DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
780 DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
799 template<
typename VT1
802 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
803 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
805 y.addAssign( A * x );
824 template<
typename VT1
827 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
828 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
830 typedef IntrinsicTrait<ElementType> IT;
832 const size_t M( A.rows() );
833 const size_t N( A.columns() );
837 for( ; (i+8UL) <= M; i+=8UL ) {
838 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
839 for(
size_t j=0UL; j<N; j+=IT::size ) {
841 xmm1 = xmm1 + A.load(i ,j) * x1;
842 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
843 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
844 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
845 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
846 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
847 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
848 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
850 y[i ] +=
sum( xmm1 );
851 y[i+1UL] +=
sum( xmm2 );
852 y[i+2UL] +=
sum( xmm3 );
853 y[i+3UL] +=
sum( xmm4 );
854 y[i+4UL] +=
sum( xmm5 );
855 y[i+5UL] +=
sum( xmm6 );
856 y[i+6UL] +=
sum( xmm7 );
857 y[i+7UL] +=
sum( xmm8 );
859 for( ; (i+4UL) <= M; i+=4UL ) {
861 for(
size_t j=0UL; j<N; j+=IT::size ) {
863 xmm1 = xmm1 + A.load(i ,j) * x1;
864 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
865 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
866 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
868 y[i ] +=
sum( xmm1 );
869 y[i+1UL] +=
sum( xmm2 );
870 y[i+2UL] +=
sum( xmm3 );
871 y[i+3UL] +=
sum( xmm4 );
873 for( ; (i+3UL) <= M; i+=3UL ) {
875 for(
size_t j=0UL; j<N; j+=IT::size ) {
877 xmm1 = xmm1 + A.load(i ,j) * x1;
878 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
879 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
881 y[i ] +=
sum( xmm1 );
882 y[i+1UL] +=
sum( xmm2 );
883 y[i+2UL] +=
sum( xmm3 );
885 for( ; (i+2UL) <= M; i+=2UL ) {
887 for(
size_t j=0UL; j<N; j+=IT::size ) {
889 xmm1 = xmm1 + A.load(i ,j) * x1;
890 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
892 y[i ] +=
sum( xmm1 );
893 y[i+1UL] +=
sum( xmm2 );
897 for(
size_t j=0UL; j<N; j+=IT::size ) {
898 xmm1 = xmm1 + A.load(i,j) * x.load(j);
920 template<
typename VT1
923 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
924 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
926 selectDefaultAddAssignKernel( y, A, x );
946 template<
typename VT1
949 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
950 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
952 using boost::numeric_cast;
958 const int M ( numeric_cast<int>( A.rows() ) );
959 const int N ( numeric_cast<int>( A.columns() ) );
960 const int lda( numeric_cast<int>( A.spacing() ) );
962 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
963 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
984 template<
typename VT1
987 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
988 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
990 using boost::numeric_cast;
996 const int M ( numeric_cast<int>( A.rows() ) );
997 const int N ( numeric_cast<int>( A.columns() ) );
998 const int lda( numeric_cast<int>( A.spacing() ) );
1000 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
1001 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1022 template<
typename VT1
1025 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1026 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1028 using boost::numeric_cast;
1037 const int M ( numeric_cast<int>( A.rows() ) );
1038 const int N ( numeric_cast<int>( A.columns() ) );
1039 const int lda( numeric_cast<int>( A.spacing() ) );
1040 const complex<float> alpha( 1.0F, 0.0F );
1041 const complex<float> beta ( 1.0F, 0.0F );
1043 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1044 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1065 template<
typename VT1
1068 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1069 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1071 using boost::numeric_cast;
1080 const int M ( numeric_cast<int>( A.rows() ) );
1081 const int N ( numeric_cast<int>( A.columns() ) );
1082 const int lda( numeric_cast<int>( A.spacing() ) );
1083 const complex<double> alpha( 1.0, 0.0 );
1084 const complex<double> beta ( 1.0, 0.0 );
1086 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1087 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1110 template<
typename VT1 >
1117 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1129 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1131 DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1133 DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1152 template<
typename VT1
1155 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1156 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1158 y.subAssign( A * x );
1177 template<
typename VT1
1180 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1181 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1183 typedef IntrinsicTrait<ElementType> IT;
1185 const size_t M( A.rows() );
1186 const size_t N( A.columns() );
1190 for( ; (i+8UL) <= M; i+=8UL ) {
1191 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1192 for(
size_t j=0UL; j<N; j+=IT::size ) {
1194 xmm1 = xmm1 + A.load(i ,j) * x1;
1195 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1196 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1197 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1198 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1199 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1200 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1201 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1203 y[i ] -=
sum( xmm1 );
1204 y[i+1UL] -=
sum( xmm2 );
1205 y[i+2UL] -=
sum( xmm3 );
1206 y[i+3UL] -=
sum( xmm4 );
1207 y[i+4UL] -=
sum( xmm5 );
1208 y[i+5UL] -=
sum( xmm6 );
1209 y[i+6UL] -=
sum( xmm7 );
1210 y[i+7UL] -=
sum( xmm8 );
1212 for( ; (i+4UL) <= M; i+=4UL ) {
1214 for(
size_t j=0UL; j<N; j+=IT::size ) {
1216 xmm1 = xmm1 + A.load(i ,j) * x1;
1217 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1218 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1219 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1221 y[i ] -=
sum( xmm1 );
1222 y[i+1UL] -=
sum( xmm2 );
1223 y[i+2UL] -=
sum( xmm3 );
1224 y[i+3UL] -=
sum( xmm4 );
1226 for( ; (i+3UL) <= M; i+=3UL ) {
1228 for(
size_t j=0UL; j<N; j+=IT::size ) {
1230 xmm1 = xmm1 + A.load(i ,j) * x1;
1231 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1232 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1234 y[i ] -=
sum( xmm1 );
1235 y[i+1UL] -=
sum( xmm2 );
1236 y[i+2UL] -=
sum( xmm3 );
1238 for( ; (i+2UL) <= M; i+=2UL ) {
1240 for(
size_t j=0UL; j<N; j+=IT::size ) {
1242 xmm1 = xmm1 + A.load(i ,j) * x1;
1243 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1245 y[i ] -=
sum( xmm1 );
1246 y[i+1UL] -=
sum( xmm2 );
1250 for(
size_t j=0UL; j<N; j+=IT::size ) {
1251 xmm1 = xmm1 + A.load(i,j) * x.load(j);
1253 y[i] -=
sum( xmm1 );
1273 template<
typename VT1
1276 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1277 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1279 selectDefaultSubAssignKernel( y, A, x );
1299 template<
typename VT1
1302 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1303 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1305 using boost::numeric_cast;
1311 const int M ( numeric_cast<int>( A.rows() ) );
1312 const int N ( numeric_cast<int>( A.columns() ) );
1313 const int lda( numeric_cast<int>( A.spacing() ) );
1315 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1316 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1337 template<
typename VT1
1340 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1341 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1343 using boost::numeric_cast;
1349 const int M ( numeric_cast<int>( A.rows() ) );
1350 const int N ( numeric_cast<int>( A.columns() ) );
1351 const int lda( numeric_cast<int>( A.spacing() ) );
1353 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1354 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1375 template<
typename VT1
1378 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1379 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1381 using boost::numeric_cast;
1390 const int M ( numeric_cast<int>( A.rows() ) );
1391 const int N ( numeric_cast<int>( A.columns() ) );
1392 const int lda( numeric_cast<int>( A.spacing() ) );
1393 const complex<float> alpha( -1.0F, 0.0F );
1394 const complex<float> beta ( 1.0F, 0.0F );
1396 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1397 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1418 template<
typename VT1
1421 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1422 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1424 using boost::numeric_cast;
1433 const int M ( numeric_cast<int>( A.rows() ) );
1434 const int N ( numeric_cast<int>( A.columns() ) );
1435 const int lda( numeric_cast<int>( A.spacing() ) );
1436 const complex<double> alpha( -1.0, 0.0 );
1437 const complex<double> beta ( 1.0, 0.0 );
1439 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1440 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1463 template<
typename VT1 >
1512 template<
typename MT
1516 :
public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1517 ,
private VecScalarMultExpr
1518 ,
private Computation
1522 typedef DMatDVecMultExpr<MT,VT> MVM;
1534 enum { evaluateMatrix = IsComputation<MT>::value && !MT::vectorizable &&
1535 IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1540 enum { evaluateVector = IsComputation<VT>::value };
1548 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1549 struct UseSinglePrecisionKernel {
1550 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1551 IsFloat<typename T1::ElementType>::value &&
1552 IsFloat<typename T2::ElementType>::value &&
1553 IsFloat<typename T3::ElementType>::value &&
1554 !IsComplex<T4>::value };
1563 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1564 struct UseDoublePrecisionKernel {
1565 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1566 IsDouble<typename T1::ElementType>::value &&
1567 IsDouble<typename T2::ElementType>::value &&
1568 IsDouble<typename T3::ElementType>::value &&
1569 !IsComplex<T4>::value };
1578 template<
typename T1,
typename T2,
typename T3 >
1579 struct UseSinglePrecisionComplexKernel {
1580 typedef complex<float> Type;
1581 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1582 IsSame<typename T1::ElementType,Type>::value &&
1583 IsSame<typename T2::ElementType,Type>::value &&
1584 IsSame<typename T3::ElementType,Type>::value };
1593 template<
typename T1,
typename T2,
typename T3 >
1594 struct UseDoublePrecisionComplexKernel {
1595 typedef complex<double> Type;
1596 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1597 IsSame<typename T1::ElementType,Type>::value &&
1598 IsSame<typename T2::ElementType,Type>::value &&
1599 IsSame<typename T3::ElementType,Type>::value };
1607 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1608 struct UseDefaultKernel {
1609 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1610 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1611 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1612 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1621 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1622 struct UseVectorizedDefaultKernel {
1623 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1624 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1625 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1626 IsSame<typename T1::ElementType,T4>::value &&
1627 IntrinsicTrait<typename T1::ElementType>::addition &&
1628 IntrinsicTrait<typename T1::ElementType>::multiplication };
1634 typedef DVecScalarMultExpr<MVM,ST,false>
This;
1635 typedef typename MultTrait<RES,ST>::Type
ResultType;
1638 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1643 typedef const DMatDVecMultExpr<MT,VT>
LeftOperand;
1649 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
LT;
1652 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
RT;
1657 enum { vectorizable = 0 };
1660 enum { smpAssignable = 0 };
1669 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
1683 return vector_[index] * scalar_;
1692 inline size_t size()
const {
1693 return vector_.size();
1723 template<
typename T >
1724 inline bool canAlias(
const T* alias )
const {
1725 return vector_.canAlias( alias );
1735 template<
typename T >
1736 inline bool isAliased(
const T* alias )
const {
1737 return vector_.isAliased( alias );
1759 template<
typename VT1 >
1760 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
1766 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1767 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1769 if( left.rows() == 0UL ) {
1772 else if( left.columns() == 0UL ) {
1785 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1787 DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1789 DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1807 template<
typename VT1
1811 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1812 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1814 y.assign( A * x * scalar );
1832 template<
typename VT1
1836 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1837 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1839 typedef IntrinsicTrait<ElementType> IT;
1841 const size_t M( A.rows() );
1842 const size_t N( A.columns() );
1846 for( ; (i+8UL) <= M; i+=8UL ) {
1847 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1848 for(
size_t j=0UL; j<N; j+=IT::size ) {
1850 xmm1 = xmm1 + A.load(i ,j) * x1;
1851 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1852 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1853 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1854 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
1855 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
1856 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
1857 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
1859 y[i ] =
sum( xmm1 ) * scalar;
1860 y[i+1UL] =
sum( xmm2 ) * scalar;
1861 y[i+2UL] =
sum( xmm3 ) * scalar;
1862 y[i+3UL] =
sum( xmm4 ) * scalar;
1863 y[i+4UL] =
sum( xmm5 ) * scalar;
1864 y[i+5UL] =
sum( xmm6 ) * scalar;
1865 y[i+6UL] =
sum( xmm7 ) * scalar;
1866 y[i+7UL] =
sum( xmm8 ) * scalar;
1868 for( ; (i+4UL) <= M; i+=4UL ) {
1870 for(
size_t j=0UL; j<N; j+=IT::size ) {
1872 xmm1 = xmm1 + A.load(i ,j) * x1;
1873 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1874 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1875 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
1877 y[i ] =
sum( xmm1 ) * scalar;
1878 y[i+1UL] =
sum( xmm2 ) * scalar;
1879 y[i+2UL] =
sum( xmm3 ) * scalar;
1880 y[i+3UL] =
sum( xmm4 ) * scalar;
1882 for( ; (i+3UL) <= M; i+=3UL ) {
1884 for(
size_t j=0UL; j<N; j+=IT::size ) {
1886 xmm1 = xmm1 + A.load(i ,j) * x1;
1887 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1888 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
1890 y[i ] =
sum( xmm1 ) * scalar;
1891 y[i+1UL] =
sum( xmm2 ) * scalar;
1892 y[i+2UL] =
sum( xmm3 ) * scalar;
1894 for( ; (i+2UL) <= M; i+=2UL ) {
1896 for(
size_t j=0UL; j<N; j+=IT::size ) {
1898 xmm1 = xmm1 + A.load(i ,j) * x1;
1899 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
1901 y[i ] =
sum( xmm1 ) * scalar;
1902 y[i+1UL] =
sum( xmm2 ) * scalar;
1906 for(
size_t j=0UL; j<N; j+=IT::size ) {
1907 xmm1 = xmm1 + A.load(i,j) * x.load(j);
1909 y[i] =
sum( xmm1 ) * scalar;
1928 template<
typename VT1
1932 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1933 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1935 selectDefaultAssignKernel( y, A, x, scalar );
1954 template<
typename VT1
1958 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1959 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1961 using boost::numeric_cast;
1967 const int M ( numeric_cast<int>( A.rows() ) );
1968 const int N ( numeric_cast<int>( A.columns() ) );
1969 const int lda( numeric_cast<int>( A.spacing() ) );
1971 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1972 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1992 template<
typename VT1
1996 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1997 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1999 using boost::numeric_cast;
2005 const int M ( numeric_cast<int>( A.rows() ) );
2006 const int N ( numeric_cast<int>( A.columns() ) );
2007 const int lda( numeric_cast<int>( A.spacing() ) );
2009 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2010 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2030 template<
typename VT1
2034 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2035 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2037 using boost::numeric_cast;
2046 const int M ( numeric_cast<int>( A.rows() ) );
2047 const int N ( numeric_cast<int>( A.columns() ) );
2048 const int lda( numeric_cast<int>( A.spacing() ) );
2049 const complex<float> alpha( scalar );
2050 const complex<float> beta ( 0.0F, 0.0F );
2052 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2053 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2073 template<
typename VT1
2077 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2078 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2080 using boost::numeric_cast;
2089 const int M ( numeric_cast<int>( A.rows() ) );
2090 const int N ( numeric_cast<int>( A.columns() ) );
2091 const int lda( numeric_cast<int>( A.spacing() ) );
2092 const complex<double> alpha( scalar );
2093 const complex<double> beta ( 0.0, 0.0 );
2095 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2096 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2113 template<
typename VT1 >
2114 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2141 template<
typename VT1 >
2142 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2148 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2149 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2151 if( left.rows() == 0UL || left.columns() == 0UL ) {
2163 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2165 DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2167 DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2185 template<
typename VT1
2189 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2190 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2192 y.addAssign( A * x * scalar );
2210 template<
typename VT1
2214 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2215 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2217 typedef IntrinsicTrait<ElementType> IT;
2219 const size_t M( A.rows() );
2220 const size_t N( A.columns() );
2224 for( ; (i+8UL) <= M; i+=8UL ) {
2225 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2226 for(
size_t j=0UL; j<N; j+=IT::size ) {
2228 xmm1 = xmm1 + A.load(i ,j) * x1;
2229 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2230 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2231 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2232 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2233 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2234 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2235 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2237 y[i ] +=
sum( xmm1 ) * scalar;
2238 y[i+1UL] +=
sum( xmm2 ) * scalar;
2239 y[i+2UL] +=
sum( xmm3 ) * scalar;
2240 y[i+3UL] +=
sum( xmm4 ) * scalar;
2241 y[i+4UL] +=
sum( xmm5 ) * scalar;
2242 y[i+5UL] +=
sum( xmm6 ) * scalar;
2243 y[i+6UL] +=
sum( xmm7 ) * scalar;
2244 y[i+7UL] +=
sum( xmm8 ) * scalar;
2246 for( ; (i+4UL) <= M; i+=4UL ) {
2248 for(
size_t j=0UL; j<N; j+=IT::size ) {
2250 xmm1 = xmm1 + A.load(i ,j) * x1;
2251 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2252 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2253 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2255 y[i ] +=
sum( xmm1 ) * scalar;
2256 y[i+1UL] +=
sum( xmm2 ) * scalar;
2257 y[i+2UL] +=
sum( xmm3 ) * scalar;
2258 y[i+3UL] +=
sum( xmm4 ) * scalar;
2260 for( ; (i+3UL) <= M; i+=3UL ) {
2262 for(
size_t j=0UL; j<N; j+=IT::size ) {
2264 xmm1 = xmm1 + A.load(i ,j) * x1;
2265 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2266 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2268 y[i ] +=
sum( xmm1 ) * scalar;
2269 y[i+1UL] +=
sum( xmm2 ) * scalar;
2270 y[i+2UL] +=
sum( xmm3 ) * scalar;
2272 for( ; (i+2UL) <= M; i+=2UL ) {
2274 for(
size_t j=0UL; j<N; j+=IT::size ) {
2276 xmm1 = xmm1 + A.load(i ,j) * x1;
2277 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2279 y[i ] +=
sum( xmm1 ) * scalar;
2280 y[i+1UL] +=
sum( xmm2 ) * scalar;
2284 for(
size_t j=0UL; j<N; j+=IT::size ) {
2285 xmm1 = xmm1 + A.load(i,j) * x.load(j);
2287 y[i] +=
sum( xmm1 ) * scalar;
2306 template<
typename VT1
2310 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2311 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2313 selectDefaultAddAssignKernel( y, A, x, scalar );
2332 template<
typename VT1
2336 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2337 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2339 using boost::numeric_cast;
2345 const int M ( numeric_cast<int>( A.rows() ) );
2346 const int N ( numeric_cast<int>( A.columns() ) );
2347 const int lda( numeric_cast<int>( A.spacing() ) );
2349 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2350 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2370 template<
typename VT1
2374 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2375 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2377 using boost::numeric_cast;
2383 const int M ( numeric_cast<int>( A.rows() ) );
2384 const int N ( numeric_cast<int>( A.columns() ) );
2385 const int lda( numeric_cast<int>( A.spacing() ) );
2387 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2388 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2408 template<
typename VT1
2412 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2413 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2415 using boost::numeric_cast;
2424 const int M ( numeric_cast<int>( A.rows() ) );
2425 const int N ( numeric_cast<int>( A.columns() ) );
2426 const int lda( numeric_cast<int>( A.spacing() ) );
2427 const complex<float> alpha( scalar );
2428 const complex<float> beta ( 1.0F, 0.0F );
2430 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2431 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2451 template<
typename VT1
2455 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2456 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2458 using boost::numeric_cast;
2467 const int M ( numeric_cast<int>( A.rows() ) );
2468 const int N ( numeric_cast<int>( A.columns() ) );
2469 const int lda( numeric_cast<int>( A.spacing() ) );
2470 const complex<double> alpha( scalar );
2471 const complex<double> beta ( 1.0, 0.0 );
2473 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2474 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2495 template<
typename VT1 >
2496 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2502 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2503 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2505 if( left.rows() == 0UL || left.columns() == 0UL ) {
2517 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2519 DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2521 DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2539 template<
typename VT1
2543 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2544 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2546 y.subAssign( A * x * scalar );
2564 template<
typename VT1
2568 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2569 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2571 typedef IntrinsicTrait<ElementType> IT;
2573 const size_t M( A.rows() );
2574 const size_t N( A.columns() );
2578 for( ; (i+8UL) <= M; i+=8UL ) {
2579 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2580 for(
size_t j=0UL; j<N; j+=IT::size ) {
2582 xmm1 = xmm1 + A.load(i ,j) * x1;
2583 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2584 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2585 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2586 xmm5 = xmm5 + A.load(i+4UL,j) * x1;
2587 xmm6 = xmm6 + A.load(i+5UL,j) * x1;
2588 xmm7 = xmm7 + A.load(i+6UL,j) * x1;
2589 xmm8 = xmm8 + A.load(i+7UL,j) * x1;
2591 y[i ] -=
sum( xmm1 ) * scalar;
2592 y[i+1UL] -=
sum( xmm2 ) * scalar;
2593 y[i+2UL] -=
sum( xmm3 ) * scalar;
2594 y[i+3UL] -=
sum( xmm4 ) * scalar;
2595 y[i+4UL] -=
sum( xmm5 ) * scalar;
2596 y[i+5UL] -=
sum( xmm6 ) * scalar;
2597 y[i+6UL] -=
sum( xmm7 ) * scalar;
2598 y[i+7UL] -=
sum( xmm8 ) * scalar;
2600 for( ; (i+4UL) <= M; i+=4UL ) {
2602 for(
size_t j=0UL; j<N; j+=IT::size ) {
2604 xmm1 = xmm1 + A.load(i ,j) * x1;
2605 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2606 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2607 xmm4 = xmm4 + A.load(i+3UL,j) * x1;
2609 y[i ] -=
sum( xmm1 ) * scalar;
2610 y[i+1UL] -=
sum( xmm2 ) * scalar;
2611 y[i+2UL] -=
sum( xmm3 ) * scalar;
2612 y[i+3UL] -=
sum( xmm4 ) * scalar;
2614 for( ; (i+3UL) <= M; i+=3UL ) {
2616 for(
size_t j=0UL; j<N; j+=IT::size ) {
2618 xmm1 = xmm1 + A.load(i ,j) * x1;
2619 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2620 xmm3 = xmm3 + A.load(i+2UL,j) * x1;
2622 y[i ] -=
sum( xmm1 ) * scalar;
2623 y[i+1UL] -=
sum( xmm2 ) * scalar;
2624 y[i+2UL] -=
sum( xmm3 ) * scalar;
2626 for( ; (i+2UL) <= M; i+=2UL ) {
2628 for(
size_t j=0UL; j<N; j+=IT::size ) {
2630 xmm1 = xmm1 + A.load(i ,j) * x1;
2631 xmm2 = xmm2 + A.load(i+1UL,j) * x1;
2633 y[i ] -=
sum( xmm1 ) * scalar;
2634 y[i+1UL] -=
sum( xmm2 ) * scalar;
2638 for(
size_t j=0UL; j<N; j+=IT::size ) {
2639 xmm1 = xmm1 + A.load(i,j) * x.load(j);
2641 y[i] -=
sum( xmm1 ) * scalar;
2660 template<
typename VT1
2664 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2665 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2667 selectDefaultSubAssignKernel( y, A, x, scalar );
2686 template<
typename VT1
2690 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2691 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2693 using boost::numeric_cast;
2699 const int M ( numeric_cast<int>( A.rows() ) );
2700 const int N ( numeric_cast<int>( A.columns() ) );
2701 const int lda( numeric_cast<int>( A.spacing() ) );
2703 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2704 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2724 template<
typename VT1
2728 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2729 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2731 using boost::numeric_cast;
2737 const int M ( numeric_cast<int>( A.rows() ) );
2738 const int N ( numeric_cast<int>( A.columns() ) );
2739 const int lda( numeric_cast<int>( A.spacing() ) );
2741 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2742 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2762 template<
typename VT1
2766 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2767 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2769 using boost::numeric_cast;
2778 const int M ( numeric_cast<int>( A.rows() ) );
2779 const int N ( numeric_cast<int>( A.columns() ) );
2780 const int lda( numeric_cast<int>( A.spacing() ) );
2781 const complex<float> alpha( -scalar );
2782 const complex<float> beta ( 1.0F, 0.0F );
2784 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2785 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2805 template<
typename VT1
2809 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2810 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2812 using boost::numeric_cast;
2821 const int M ( numeric_cast<int>( A.rows() ) );
2822 const int N ( numeric_cast<int>( A.columns() ) );
2823 const int lda( numeric_cast<int>( A.spacing() ) );
2824 const complex<double> alpha( -scalar );
2825 const complex<double> beta ( 1.0, 0.0 );
2827 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2828 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2849 template<
typename VT1 >
2850 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2922 template<
typename T1
2924 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2930 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
2958 template<
typename T1
2961 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2966 return (~mat).leftOperand() * ( (~mat).
rightOperand() * vec );
2981 template<
typename MT,
typename VT >
2986 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT>::Type, VT >::Type Type;
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:240
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:325
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
DMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the DMatDVecMultExpr class.
Definition: DMatDVecMultExpr.h:263
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
Expression object for dense matrix-dense vector multiplications.The DMatDVecMultExpr class represents...
Definition: DMatDVecMultExpr.h:103
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:109
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDVecMultExpr.h:229
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
Header file for the DenseVector base class.
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
DMatDVecMultExpr< MT, VT > This
Type of this DMatDVecMultExpr instance.
Definition: DMatDVecMultExpr.h:228
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
size_t size() const
Returns the current size/dimension of the vector.
Definition: DMatDVecMultExpr.h:305
const size_t end_
End of the unrolled calculation loop.
Definition: DMatDVecMultExpr.h:358
Constraint on the data type.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:114
Constraint on the data type.
Header file for the MultExprTrait class template.
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:111
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:250
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: DMatDVecMultExpr.h:246
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the IsMatMatMultExpr type trait class.
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:113
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:315
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
Base class for all matrix/vector multiplication expression templates.The MatVecMultExpr class serves ...
Definition: MatVecMultExpr.h:66
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDVecMultExpr.h:243
Constraints on the storage order of matrix types.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDVecMultExpr.h:231
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:269
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDVecMultExpr.h:356
Header file for the EnableIf class template.
Header file for the IsNumeric type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: DMatDVecMultExpr.h:357
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDVecMultExpr.h:337
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:110
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDVecMultExpr.h:234
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDVecMultExpr.h:237
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDVecMultExpr.h:349
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDVecMultExpr.h:230
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDVecMultExpr.h:232
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDVecMultExpr.h:233
size_t columns(const Matrix< MT, SO > &m)
Returns the current number of columns of the matrix.
Definition: Matrix.h:154
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: DMatDVecMultExpr.h:278
Header file for the MatVecMultExpr base class.
const size_t DMATDVECMULT_THRESHOLD
Row-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:51
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: DMatDVecMultExpr.h:112
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.