22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
31 #include <boost/cast.hpp>
92 typedef typename MT::ResultType
MRT;
93 typedef typename VT::ResultType
VRT;
94 typedef typename MRT::ElementType
MET;
95 typedef typename VRT::ElementType
VET;
96 typedef typename MT::CompositeType
MCT;
97 typedef typename VT::CompositeType
VCT;
112 template<
typename T1,
typename T2,
typename T3 >
113 struct UseSinglePrecisionKernel {
114 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
128 template<
typename T1,
typename T2,
typename T3 >
129 struct UseDoublePrecisionKernel {
130 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
131 IsDouble<typename T1::ElementType>::value &&
132 IsDouble<typename T2::ElementType>::value &&
133 IsDouble<typename T3::ElementType>::value };
144 template<
typename T1,
typename T2,
typename T3 >
145 struct UseSinglePrecisionComplexKernel {
146 typedef complex<float> Type;
147 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
148 IsSame<typename T1::ElementType,Type>::value &&
149 IsSame<typename T2::ElementType,Type>::value &&
150 IsSame<typename T3::ElementType,Type>::value };
161 template<
typename T1,
typename T2,
typename T3 >
162 struct UseDoublePrecisionComplexKernel {
163 typedef complex<double> Type;
164 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
165 IsSame<typename T1::ElementType,Type>::value &&
166 IsSame<typename T2::ElementType,Type>::value &&
167 IsSame<typename T3::ElementType,Type>::value };
177 template<
typename T1,
typename T2,
typename T3 >
178 struct UseDefaultKernel {
179 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
180 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
181 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
182 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
193 template<
typename T1,
typename T2,
typename T3 >
194 struct UseVectorizedDefaultKernel {
195 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
196 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
197 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
198 IntrinsicTrait<typename T1::ElementType>::addition &&
199 IntrinsicTrait<typename T1::ElementType>::multiplication };
229 enum { vectorizable = 0 };
258 if(
mat_.columns() != 0UL ) {
260 for(
size_t j=1UL; j<
end_; j+=2UL ) {
263 if( end_ <
mat_.columns() ) {
311 template<
typename T >
313 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
323 template<
typename T >
325 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
348 template<
typename VT1 >
355 if( rhs.
mat_.rows() == 0UL ) {
358 else if( rhs.
mat_.columns() == 0UL ) {
373 DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
375 DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
394 template<
typename VT1
398 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
419 template<
typename VT1
422 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
423 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
425 typedef IntrinsicTrait<ElementType> IT;
427 const size_t M( A.rows() );
428 const size_t N( A.columns() );
432 for( ; (i+8UL) <= M; i+=8UL ) {
433 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
434 for(
size_t j=0UL; j<N; j+=IT::size ) {
436 xmm1 = xmm1 + A.get(i ,j) * x1;
437 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
438 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
439 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
440 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
441 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
442 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
443 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
446 y[i+1UL] =
sum( xmm2 );
447 y[i+2UL] =
sum( xmm3 );
448 y[i+3UL] =
sum( xmm4 );
449 y[i+4UL] =
sum( xmm5 );
450 y[i+5UL] =
sum( xmm6 );
451 y[i+6UL] =
sum( xmm7 );
452 y[i+7UL] =
sum( xmm8 );
454 for( ; (i+4UL) <= M; i+=4UL ) {
456 for(
size_t j=0UL; j<N; j+=IT::size ) {
458 xmm1 = xmm1 + A.get(i ,j) * x1;
459 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
460 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
461 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
464 y[i+1UL] =
sum( xmm2 );
465 y[i+2UL] =
sum( xmm3 );
466 y[i+3UL] =
sum( xmm4 );
468 for( ; (i+3UL) <= M; i+=3UL ) {
470 for(
size_t j=0UL; j<N; j+=IT::size ) {
472 xmm1 = xmm1 + A.get(i ,j) * x1;
473 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
474 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
477 y[i+1UL] =
sum( xmm2 );
478 y[i+2UL] =
sum( xmm3 );
480 for( ; (i+2UL) <= M; i+=2UL ) {
482 for(
size_t j=0UL; j<N; j+=IT::size ) {
484 xmm1 = xmm1 + A.get(i ,j) * x1;
485 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
488 y[i+1UL] =
sum( xmm2 );
492 for(
size_t j=0UL; j<N; j+=IT::size ) {
493 xmm1 = xmm1 + A.get(i,j) * x.get(j);
515 template<
typename VT1
518 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
519 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
521 selectDefaultAssignKernel( y, A, x );
541 template<
typename VT1
544 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
545 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
547 using boost::numeric_cast;
553 const int M ( numeric_cast<int>( A.rows() ) );
554 const int N ( numeric_cast<int>( A.columns() ) );
555 const int lda( numeric_cast<int>( A.spacing() ) );
557 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
558 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
579 template<
typename VT1
582 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
583 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
585 using boost::numeric_cast;
591 const int M ( numeric_cast<int>( A.rows() ) );
592 const int N ( numeric_cast<int>( A.columns() ) );
593 const int lda( numeric_cast<int>( A.spacing() ) );
595 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
596 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
617 template<
typename VT1
620 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
621 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
623 using boost::numeric_cast;
632 const int M ( numeric_cast<int>( A.rows() ) );
633 const int N ( numeric_cast<int>( A.columns() ) );
634 const int lda( numeric_cast<int>( A.spacing() ) );
635 const complex<float> alpha( 1.0F, 0.0F );
636 const complex<float> beta ( 0.0F, 0.0F );
638 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
639 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
660 template<
typename VT1
663 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
664 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
666 using boost::numeric_cast;
675 const int M ( numeric_cast<int>( A.rows() ) );
676 const int N ( numeric_cast<int>( A.columns() ) );
677 const int lda( numeric_cast<int>( A.spacing() ) );
678 const complex<double> alpha( 1.0, 0.0 );
679 const complex<double> beta ( 0.0, 0.0 );
681 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
682 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
700 template<
typename VT1 >
729 template<
typename VT1 >
736 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
748 if( ( IsComputation<MT>::value && !evaluate ) ||
750 DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
752 DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
771 template<
typename VT1
774 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
775 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
777 y.addAssign( A * x );
796 template<
typename VT1
799 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
800 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
802 typedef IntrinsicTrait<ElementType> IT;
804 const size_t M( A.rows() );
805 const size_t N( A.columns() );
809 for( ; (i+8UL) <= M; i+=8UL ) {
810 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
811 for(
size_t j=0UL; j<N; j+=IT::size ) {
813 xmm1 = xmm1 + A.get(i ,j) * x1;
814 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
815 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
816 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
817 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
818 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
819 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
820 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
822 y[i ] +=
sum( xmm1 );
823 y[i+1UL] +=
sum( xmm2 );
824 y[i+2UL] +=
sum( xmm3 );
825 y[i+3UL] +=
sum( xmm4 );
826 y[i+4UL] +=
sum( xmm5 );
827 y[i+5UL] +=
sum( xmm6 );
828 y[i+6UL] +=
sum( xmm7 );
829 y[i+7UL] +=
sum( xmm8 );
831 for( ; (i+4UL) <= M; i+=4UL ) {
833 for(
size_t j=0UL; j<N; j+=IT::size ) {
835 xmm1 = xmm1 + A.get(i ,j) * x1;
836 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
837 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
838 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
840 y[i ] +=
sum( xmm1 );
841 y[i+1UL] +=
sum( xmm2 );
842 y[i+2UL] +=
sum( xmm3 );
843 y[i+3UL] +=
sum( xmm4 );
845 for( ; (i+3UL) <= M; i+=3UL ) {
847 for(
size_t j=0UL; j<N; j+=IT::size ) {
849 xmm1 = xmm1 + A.get(i ,j) * x1;
850 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
851 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
853 y[i ] +=
sum( xmm1 );
854 y[i+1UL] +=
sum( xmm2 );
855 y[i+2UL] +=
sum( xmm3 );
857 for( ; (i+2UL) <= M; i+=2UL ) {
859 for(
size_t j=0UL; j<N; j+=IT::size ) {
861 xmm1 = xmm1 + A.get(i ,j) * x1;
862 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
864 y[i ] +=
sum( xmm1 );
865 y[i+1UL] +=
sum( xmm2 );
869 for(
size_t j=0UL; j<N; j+=IT::size ) {
870 xmm1 = xmm1 + A.get(i,j) * x.get(j);
892 template<
typename VT1
895 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
896 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
898 selectDefaultAddAssignKernel( y, A, x );
918 template<
typename VT1
921 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
922 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
924 using boost::numeric_cast;
930 const int M ( numeric_cast<int>( A.rows() ) );
931 const int N ( numeric_cast<int>( A.columns() ) );
932 const int lda( numeric_cast<int>( A.spacing() ) );
934 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
935 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
956 template<
typename VT1
959 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
960 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
962 using boost::numeric_cast;
968 const int M ( numeric_cast<int>( A.rows() ) );
969 const int N ( numeric_cast<int>( A.columns() ) );
970 const int lda( numeric_cast<int>( A.spacing() ) );
972 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
973 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
994 template<
typename VT1
997 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
998 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1000 using boost::numeric_cast;
1009 const int M ( numeric_cast<int>( A.rows() ) );
1010 const int N ( numeric_cast<int>( A.columns() ) );
1011 const int lda( numeric_cast<int>( A.spacing() ) );
1012 const complex<float> alpha( 1.0F, 0.0F );
1013 const complex<float> beta ( 1.0F, 0.0F );
1015 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1016 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1037 template<
typename VT1
1040 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1041 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1043 using boost::numeric_cast;
1052 const int M ( numeric_cast<int>( A.rows() ) );
1053 const int N ( numeric_cast<int>( A.columns() ) );
1054 const int lda( numeric_cast<int>( A.spacing() ) );
1055 const complex<double> alpha( 1.0, 0.0 );
1056 const complex<double> beta ( 1.0, 0.0 );
1058 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1059 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1081 template<
typename VT1 >
1088 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1100 if( ( IsComputation<MT>::value && !evaluate ) ||
1102 DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1104 DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1123 template<
typename VT1
1126 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1127 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1129 y.subAssign( A * x );
1148 template<
typename VT1
1151 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1152 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1154 typedef IntrinsicTrait<ElementType> IT;
1156 const size_t M( A.rows() );
1157 const size_t N( A.columns() );
1161 for( ; (i+8UL) <= M; i+=8UL ) {
1162 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1163 for(
size_t j=0UL; j<N; j+=IT::size ) {
1165 xmm1 = xmm1 + A.get(i ,j) * x1;
1166 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1167 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1168 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1169 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1170 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1171 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1172 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1174 y[i ] -=
sum( xmm1 );
1175 y[i+1UL] -=
sum( xmm2 );
1176 y[i+2UL] -=
sum( xmm3 );
1177 y[i+3UL] -=
sum( xmm4 );
1178 y[i+4UL] -=
sum( xmm5 );
1179 y[i+5UL] -=
sum( xmm6 );
1180 y[i+6UL] -=
sum( xmm7 );
1181 y[i+7UL] -=
sum( xmm8 );
1183 for( ; (i+4UL) <= M; i+=4UL ) {
1185 for(
size_t j=0UL; j<N; j+=IT::size ) {
1187 xmm1 = xmm1 + A.get(i ,j) * x1;
1188 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1189 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1190 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1192 y[i ] -=
sum( xmm1 );
1193 y[i+1UL] -=
sum( xmm2 );
1194 y[i+2UL] -=
sum( xmm3 );
1195 y[i+3UL] -=
sum( xmm4 );
1197 for( ; (i+3UL) <= M; i+=3UL ) {
1199 for(
size_t j=0UL; j<N; j+=IT::size ) {
1201 xmm1 = xmm1 + A.get(i ,j) * x1;
1202 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1203 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1205 y[i ] -=
sum( xmm1 );
1206 y[i+1UL] -=
sum( xmm2 );
1207 y[i+2UL] -=
sum( xmm3 );
1209 for( ; (i+2UL) <= M; i+=2UL ) {
1211 for(
size_t j=0UL; j<N; j+=IT::size ) {
1213 xmm1 = xmm1 + A.get(i ,j) * x1;
1214 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1216 y[i ] -=
sum( xmm1 );
1217 y[i+1UL] -=
sum( xmm2 );
1221 for(
size_t j=0UL; j<N; j+=IT::size ) {
1222 xmm1 = xmm1 + A.get(i,j) * x.get(j);
1224 y[i] -=
sum( xmm1 );
1244 template<
typename VT1
1247 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1248 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1250 selectDefaultSubAssignKernel( y, A, x );
1270 template<
typename VT1
1273 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1274 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1276 using boost::numeric_cast;
1282 const int M ( numeric_cast<int>( A.rows() ) );
1283 const int N ( numeric_cast<int>( A.columns() ) );
1284 const int lda( numeric_cast<int>( A.spacing() ) );
1286 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1287 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1308 template<
typename VT1
1311 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1312 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1314 using boost::numeric_cast;
1320 const int M ( numeric_cast<int>( A.rows() ) );
1321 const int N ( numeric_cast<int>( A.columns() ) );
1322 const int lda( numeric_cast<int>( A.spacing() ) );
1324 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1325 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1346 template<
typename VT1
1349 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1350 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1352 using boost::numeric_cast;
1361 const int M ( numeric_cast<int>( A.rows() ) );
1362 const int N ( numeric_cast<int>( A.columns() ) );
1363 const int lda( numeric_cast<int>( A.spacing() ) );
1364 const complex<float> alpha( -1.0F, 0.0F );
1365 const complex<float> beta ( 1.0F, 0.0F );
1367 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1368 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1389 template<
typename VT1
1392 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1393 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1395 using boost::numeric_cast;
1404 const int M ( numeric_cast<int>( A.rows() ) );
1405 const int N ( numeric_cast<int>( A.columns() ) );
1406 const int lda( numeric_cast<int>( A.spacing() ) );
1407 const complex<double> alpha( -1.0, 0.0 );
1408 const complex<double> beta ( 1.0, 0.0 );
1410 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1411 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1433 template<
typename VT1 >
1482 template<
typename MT
1486 :
public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1487 ,
private Expression
1488 ,
private Computation
1492 typedef DMatDVecMultExpr<MT,VT> MVM;
1493 typedef typename MVM::ResultType RES;
1494 typedef typename MT::ResultType
MRT;
1495 typedef typename VT::ResultType
VRT;
1496 typedef typename MRT::ElementType
MET;
1497 typedef typename VRT::ElementType
VET;
1498 typedef typename MT::CompositeType
MCT;
1499 typedef typename VT::CompositeType
VCT;
1504 enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1505 IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1513 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1514 struct UseSinglePrecisionKernel {
1515 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1516 IsFloat<typename T1::ElementType>::value &&
1517 IsFloat<typename T2::ElementType>::value &&
1518 IsFloat<typename T3::ElementType>::value &&
1519 !IsComplex<T4>::value };
1528 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1529 struct UseDoublePrecisionKernel {
1530 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1531 IsDouble<typename T1::ElementType>::value &&
1532 IsDouble<typename T2::ElementType>::value &&
1533 IsDouble<typename T3::ElementType>::value &&
1534 !IsComplex<T4>::value };
1543 template<
typename T1,
typename T2,
typename T3 >
1544 struct UseSinglePrecisionComplexKernel {
1545 typedef complex<float> Type;
1546 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1547 IsSame<typename T1::ElementType,Type>::value &&
1548 IsSame<typename T2::ElementType,Type>::value &&
1549 IsSame<typename T3::ElementType,Type>::value };
1558 template<
typename T1,
typename T2,
typename T3 >
1559 struct UseDoublePrecisionComplexKernel {
1560 typedef complex<double> Type;
1561 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1562 IsSame<typename T1::ElementType,Type>::value &&
1563 IsSame<typename T2::ElementType,Type>::value &&
1564 IsSame<typename T3::ElementType,Type>::value };
1572 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1573 struct UseDefaultKernel {
1574 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1575 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1576 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1577 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1586 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1587 struct UseVectorizedDefaultKernel {
1588 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1589 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1590 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1591 IsSame<typename T1::ElementType,T4>::value &&
1592 IntrinsicTrait<typename T1::ElementType>::addition &&
1593 IntrinsicTrait<typename T1::ElementType>::multiplication };
1599 typedef DVecScalarMultExpr<MVM,ST,false>
This;
1600 typedef typename MultTrait<RES,ST>::Type
ResultType;
1602 typedef typename ResultType::ElementType
ElementType;
1603 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1608 typedef const DMatDVecMultExpr<MT,VT>
LeftOperand;
1614 typedef typename SelectType< evaluate, const MRT, MCT >::Type
LT;
1617 typedef typename SelectType< IsComputation<VT>::value,
const VRT,
VCT >::Type
RT;
1622 enum { vectorizable = 0 };
1631 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
1645 return vector_[index] * scalar_;
1654 inline size_t size()
const {
1655 return vector_.size();
1685 template<
typename T >
1686 inline bool canAlias(
const T* alias )
const {
1687 return vector_.canAlias( alias );
1697 template<
typename T >
1698 inline bool isAliased(
const T* alias )
const {
1699 return vector_.isAliased( alias );
1721 template<
typename VT1 >
1722 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
1728 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1729 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1731 if( left.rows() == 0UL ) {
1734 else if( left.columns() == 0UL ) {
1747 if( ( IsComputation<MT>::value && !evaluate ) ||
1749 DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1751 DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1769 template<
typename VT1
1773 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1774 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1776 y.assign( A * x * scalar );
1794 template<
typename VT1
1798 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1799 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1801 typedef IntrinsicTrait<ElementType> IT;
1803 const size_t M( A.rows() );
1804 const size_t N( A.columns() );
1808 for( ; (i+8UL) <= M; i+=8UL ) {
1809 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1810 for(
size_t j=0UL; j<N; j+=IT::size ) {
1812 xmm1 = xmm1 + A.get(i ,j) * x1;
1813 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1814 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1815 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1816 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1817 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1818 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1819 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1821 y[i ] =
sum( xmm1 ) * scalar;
1822 y[i+1UL] =
sum( xmm2 ) * scalar;
1823 y[i+2UL] =
sum( xmm3 ) * scalar;
1824 y[i+3UL] =
sum( xmm4 ) * scalar;
1825 y[i+4UL] =
sum( xmm5 ) * scalar;
1826 y[i+5UL] =
sum( xmm6 ) * scalar;
1827 y[i+6UL] =
sum( xmm7 ) * scalar;
1828 y[i+7UL] =
sum( xmm8 ) * scalar;
1830 for( ; (i+4UL) <= M; i+=4UL ) {
1832 for(
size_t j=0UL; j<N; j+=IT::size ) {
1834 xmm1 = xmm1 + A.get(i ,j) * x1;
1835 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1836 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1837 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1839 y[i ] =
sum( xmm1 ) * scalar;
1840 y[i+1UL] =
sum( xmm2 ) * scalar;
1841 y[i+2UL] =
sum( xmm3 ) * scalar;
1842 y[i+3UL] =
sum( xmm4 ) * scalar;
1844 for( ; (i+3UL) <= M; i+=3UL ) {
1846 for(
size_t j=0UL; j<N; j+=IT::size ) {
1848 xmm1 = xmm1 + A.get(i ,j) * x1;
1849 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1850 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1852 y[i ] =
sum( xmm1 ) * scalar;
1853 y[i+1UL] =
sum( xmm2 ) * scalar;
1854 y[i+2UL] =
sum( xmm3 ) * scalar;
1856 for( ; (i+2UL) <= M; i+=2UL ) {
1858 for(
size_t j=0UL; j<N; j+=IT::size ) {
1860 xmm1 = xmm1 + A.get(i ,j) * x1;
1861 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1863 y[i ] =
sum( xmm1 ) * scalar;
1864 y[i+1UL] =
sum( xmm2 ) * scalar;
1868 for(
size_t j=0UL; j<N; j+=IT::size ) {
1869 xmm1 = xmm1 + A.get(i,j) * x.get(j);
1871 y[i] =
sum( xmm1 ) * scalar;
1890 template<
typename VT1
1894 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1895 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1897 selectDefaultAssignKernel( y, A, x, scalar );
1916 template<
typename VT1
1920 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1921 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1923 using boost::numeric_cast;
1929 const int M ( numeric_cast<int>( A.rows() ) );
1930 const int N ( numeric_cast<int>( A.columns() ) );
1931 const int lda( numeric_cast<int>( A.spacing() ) );
1933 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1934 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1954 template<
typename VT1
1958 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1959 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1961 using boost::numeric_cast;
1967 const int M ( numeric_cast<int>( A.rows() ) );
1968 const int N ( numeric_cast<int>( A.columns() ) );
1969 const int lda( numeric_cast<int>( A.spacing() ) );
1971 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1972 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1992 template<
typename VT1
1996 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1997 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1999 using boost::numeric_cast;
2009 const int M ( numeric_cast<int>( A.rows() ) );
2010 const int N ( numeric_cast<int>( A.columns() ) );
2011 const int lda( numeric_cast<int>( A.spacing() ) );
2012 const complex<float> alpha( scalar );
2013 const complex<float> beta ( 0.0F, 0.0F );
2015 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2016 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2036 template<
typename VT1
2040 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2041 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2043 using boost::numeric_cast;
2053 const int M ( numeric_cast<int>( A.rows() ) );
2054 const int N ( numeric_cast<int>( A.columns() ) );
2055 const int lda( numeric_cast<int>( A.spacing() ) );
2056 const complex<double> alpha( scalar );
2057 const complex<double> beta ( 0.0, 0.0 );
2059 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2060 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2076 template<
typename VT1 >
2077 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2104 template<
typename VT1 >
2105 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2111 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2112 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2114 if( left.rows() == 0UL || left.columns() == 0UL ) {
2126 if( ( IsComputation<MT>::value && !evaluate ) ||
2128 DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2130 DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2148 template<
typename VT1
2152 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2153 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2155 y.addAssign( A * x * scalar );
2173 template<
typename VT1
2177 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2178 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2180 typedef IntrinsicTrait<ElementType> IT;
2182 const size_t M( A.rows() );
2183 const size_t N( A.columns() );
2187 for( ; (i+8UL) <= M; i+=8UL ) {
2188 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2189 for(
size_t j=0UL; j<N; j+=IT::size ) {
2191 xmm1 = xmm1 + A.get(i ,j) * x1;
2192 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2193 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2194 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2195 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2196 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2197 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2198 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2200 y[i ] +=
sum( xmm1 ) * scalar;
2201 y[i+1UL] +=
sum( xmm2 ) * scalar;
2202 y[i+2UL] +=
sum( xmm3 ) * scalar;
2203 y[i+3UL] +=
sum( xmm4 ) * scalar;
2204 y[i+4UL] +=
sum( xmm5 ) * scalar;
2205 y[i+5UL] +=
sum( xmm6 ) * scalar;
2206 y[i+6UL] +=
sum( xmm7 ) * scalar;
2207 y[i+7UL] +=
sum( xmm8 ) * scalar;
2209 for( ; (i+4UL) <= M; i+=4UL ) {
2211 for(
size_t j=0UL; j<N; j+=IT::size ) {
2213 xmm1 = xmm1 + A.get(i ,j) * x1;
2214 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2215 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2216 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2218 y[i ] +=
sum( xmm1 ) * scalar;
2219 y[i+1UL] +=
sum( xmm2 ) * scalar;
2220 y[i+2UL] +=
sum( xmm3 ) * scalar;
2221 y[i+3UL] +=
sum( xmm4 ) * scalar;
2223 for( ; (i+3UL) <= M; i+=3UL ) {
2225 for(
size_t j=0UL; j<N; j+=IT::size ) {
2227 xmm1 = xmm1 + A.get(i ,j) * x1;
2228 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2229 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2231 y[i ] +=
sum( xmm1 ) * scalar;
2232 y[i+1UL] +=
sum( xmm2 ) * scalar;
2233 y[i+2UL] +=
sum( xmm3 ) * scalar;
2235 for( ; (i+2UL) <= M; i+=2UL ) {
2237 for(
size_t j=0UL; j<N; j+=IT::size ) {
2239 xmm1 = xmm1 + A.get(i ,j) * x1;
2240 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2242 y[i ] +=
sum( xmm1 ) * scalar;
2243 y[i+1UL] +=
sum( xmm2 ) * scalar;
2247 for(
size_t j=0UL; j<N; j+=IT::size ) {
2248 xmm1 = xmm1 + A.get(i,j) * x.get(j);
2250 y[i] +=
sum( xmm1 ) * scalar;
2269 template<
typename VT1
2273 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2274 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2276 selectDefaultAddAssignKernel( y, A, x, scalar );
2295 template<
typename VT1
2299 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2300 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2302 using boost::numeric_cast;
2308 const int M ( numeric_cast<int>( A.rows() ) );
2309 const int N ( numeric_cast<int>( A.columns() ) );
2310 const int lda( numeric_cast<int>( A.spacing() ) );
2312 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2313 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2333 template<
typename VT1
2337 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2338 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2340 using boost::numeric_cast;
2346 const int M ( numeric_cast<int>( A.rows() ) );
2347 const int N ( numeric_cast<int>( A.columns() ) );
2348 const int lda( numeric_cast<int>( A.spacing() ) );
2350 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2351 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2371 template<
typename VT1
2375 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2376 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2378 using boost::numeric_cast;
2388 const int M ( numeric_cast<int>( A.rows() ) );
2389 const int N ( numeric_cast<int>( A.columns() ) );
2390 const int lda( numeric_cast<int>( A.spacing() ) );
2391 const complex<float> alpha( scalar );
2392 const complex<float> beta ( 1.0F, 0.0F );
2394 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2395 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2415 template<
typename VT1
2419 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2420 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2422 using boost::numeric_cast;
2432 const int M ( numeric_cast<int>( A.rows() ) );
2433 const int N ( numeric_cast<int>( A.columns() ) );
2434 const int lda( numeric_cast<int>( A.spacing() ) );
2435 const complex<double> alpha( scalar );
2436 const complex<double> beta ( 1.0, 0.0 );
2438 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2439 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2460 template<
typename VT1 >
2461 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2467 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2468 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2470 if( left.rows() == 0UL || left.columns() == 0UL ) {
2482 if( ( IsComputation<MT>::value && !evaluate ) ||
2484 DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2486 DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2504 template<
typename VT1
2508 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2509 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2511 y.subAssign( A * x * scalar );
2529 template<
typename VT1
2533 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2534 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2536 typedef IntrinsicTrait<ElementType> IT;
2538 const size_t M( A.rows() );
2539 const size_t N( A.columns() );
2543 for( ; (i+8UL) <= M; i+=8UL ) {
2544 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2545 for(
size_t j=0UL; j<N; j+=IT::size ) {
2547 xmm1 = xmm1 + A.get(i ,j) * x1;
2548 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2549 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2550 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2551 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2552 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2553 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2554 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2556 y[i ] -=
sum( xmm1 ) * scalar;
2557 y[i+1UL] -=
sum( xmm2 ) * scalar;
2558 y[i+2UL] -=
sum( xmm3 ) * scalar;
2559 y[i+3UL] -=
sum( xmm4 ) * scalar;
2560 y[i+4UL] -=
sum( xmm5 ) * scalar;
2561 y[i+5UL] -=
sum( xmm6 ) * scalar;
2562 y[i+6UL] -=
sum( xmm7 ) * scalar;
2563 y[i+7UL] -=
sum( xmm8 ) * scalar;
2565 for( ; (i+4UL) <= M; i+=4UL ) {
2567 for(
size_t j=0UL; j<N; j+=IT::size ) {
2569 xmm1 = xmm1 + A.get(i ,j) * x1;
2570 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2571 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2572 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2574 y[i ] -=
sum( xmm1 ) * scalar;
2575 y[i+1UL] -=
sum( xmm2 ) * scalar;
2576 y[i+2UL] -=
sum( xmm3 ) * scalar;
2577 y[i+3UL] -=
sum( xmm4 ) * scalar;
2579 for( ; (i+3UL) <= M; i+=3UL ) {
2581 for(
size_t j=0UL; j<N; j+=IT::size ) {
2583 xmm1 = xmm1 + A.get(i ,j) * x1;
2584 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2585 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2587 y[i ] -=
sum( xmm1 ) * scalar;
2588 y[i+1UL] -=
sum( xmm2 ) * scalar;
2589 y[i+2UL] -=
sum( xmm3 ) * scalar;
2591 for( ; (i+2UL) <= M; i+=2UL ) {
2593 for(
size_t j=0UL; j<N; j+=IT::size ) {
2595 xmm1 = xmm1 + A.get(i ,j) * x1;
2596 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2598 y[i ] -=
sum( xmm1 ) * scalar;
2599 y[i+1UL] -=
sum( xmm2 ) * scalar;
2603 for(
size_t j=0UL; j<N; j+=IT::size ) {
2604 xmm1 = xmm1 + A.get(i,j) * x.get(j);
2606 y[i] -=
sum( xmm1 ) * scalar;
2625 template<
typename VT1
2629 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2630 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2632 selectDefaultSubAssignKernel( y, A, x, scalar );
2651 template<
typename VT1
2655 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2656 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2658 using boost::numeric_cast;
2664 const int M ( numeric_cast<int>( A.rows() ) );
2665 const int N ( numeric_cast<int>( A.columns() ) );
2666 const int lda( numeric_cast<int>( A.spacing() ) );
2668 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2669 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2689 template<
typename VT1
2693 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2694 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2696 using boost::numeric_cast;
2702 const int M ( numeric_cast<int>( A.rows() ) );
2703 const int N ( numeric_cast<int>( A.columns() ) );
2704 const int lda( numeric_cast<int>( A.spacing() ) );
2706 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2707 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2727 template<
typename VT1
2731 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2732 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2734 using boost::numeric_cast;
2744 const int M ( numeric_cast<int>( A.rows() ) );
2745 const int N ( numeric_cast<int>( A.columns() ) );
2746 const int lda( numeric_cast<int>( A.spacing() ) );
2747 const complex<float> alpha( -scalar );
2748 const complex<float> beta ( 1.0F, 0.0F );
2750 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2751 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2771 template<
typename VT1
2775 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2776 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2778 using boost::numeric_cast;
2788 const int M ( numeric_cast<int>( A.rows() ) );
2789 const int N ( numeric_cast<int>( A.columns() ) );
2790 const int lda( numeric_cast<int>( A.spacing() ) );
2791 const complex<double> alpha( -scalar );
2792 const complex<double> beta ( 1.0, 0.0 );
2794 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2795 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2816 template<
typename VT1 >
2817 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2888 template<
typename T1
2890 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2896 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
2924 template<
typename T1
2927 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2932 return (~mat).leftOperand() * ( (~mat).
rightOperand() * vec );