22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
31 #include <boost/cast.hpp>
94 typedef typename MT::ResultType
MRT;
95 typedef typename VT::ResultType
VRT;
96 typedef typename MRT::ElementType
MET;
97 typedef typename VRT::ElementType
VET;
98 typedef typename MT::CompositeType
MCT;
99 typedef typename VT::CompositeType
VCT;
114 template<
typename T1,
typename T2,
typename T3 >
115 struct UseSinglePrecisionKernel {
116 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
130 template<
typename T1,
typename T2,
typename T3 >
131 struct UseDoublePrecisionKernel {
132 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
133 IsDouble<typename T1::ElementType>::value &&
134 IsDouble<typename T2::ElementType>::value &&
135 IsDouble<typename T3::ElementType>::value };
146 template<
typename T1,
typename T2,
typename T3 >
147 struct UseSinglePrecisionComplexKernel {
148 typedef complex<float> Type;
149 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
150 IsSame<typename T1::ElementType,Type>::value &&
151 IsSame<typename T2::ElementType,Type>::value &&
152 IsSame<typename T3::ElementType,Type>::value };
163 template<
typename T1,
typename T2,
typename T3 >
164 struct UseDoublePrecisionComplexKernel {
165 typedef complex<double> Type;
166 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167 IsSame<typename T1::ElementType,Type>::value &&
168 IsSame<typename T2::ElementType,Type>::value &&
169 IsSame<typename T3::ElementType,Type>::value };
179 template<
typename T1,
typename T2,
typename T3 >
180 struct UseDefaultKernel {
181 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
182 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
183 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
184 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
195 template<
typename T1,
typename T2,
typename T3 >
196 struct UseVectorizedDefaultKernel {
197 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
198 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
199 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
200 IntrinsicTrait<typename T1::ElementType>::addition &&
201 IntrinsicTrait<typename T1::ElementType>::multiplication };
231 enum { vectorizable = 0 };
260 if(
mat_.columns() != 0UL ) {
262 for(
size_t j=1UL; j<
end_; j+=2UL ) {
265 if( end_ <
mat_.columns() ) {
313 template<
typename T >
315 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
325 template<
typename T >
327 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
350 template<
typename VT1 >
357 if( rhs.
mat_.rows() == 0UL ) {
360 else if( rhs.
mat_.columns() == 0UL ) {
375 DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
377 DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
396 template<
typename VT1
400 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
421 template<
typename VT1
424 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
425 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
427 typedef IntrinsicTrait<ElementType> IT;
429 const size_t M( A.rows() );
430 const size_t N( A.columns() );
434 for( ; (i+8UL) <= M; i+=8UL ) {
435 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
436 for(
size_t j=0UL; j<N; j+=IT::size ) {
438 xmm1 = xmm1 + A.get(i ,j) * x1;
439 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
440 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
441 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
442 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
443 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
444 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
445 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
448 y[i+1UL] =
sum( xmm2 );
449 y[i+2UL] =
sum( xmm3 );
450 y[i+3UL] =
sum( xmm4 );
451 y[i+4UL] =
sum( xmm5 );
452 y[i+5UL] =
sum( xmm6 );
453 y[i+6UL] =
sum( xmm7 );
454 y[i+7UL] =
sum( xmm8 );
456 for( ; (i+4UL) <= M; i+=4UL ) {
458 for(
size_t j=0UL; j<N; j+=IT::size ) {
460 xmm1 = xmm1 + A.get(i ,j) * x1;
461 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
462 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
463 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
466 y[i+1UL] =
sum( xmm2 );
467 y[i+2UL] =
sum( xmm3 );
468 y[i+3UL] =
sum( xmm4 );
470 for( ; (i+3UL) <= M; i+=3UL ) {
472 for(
size_t j=0UL; j<N; j+=IT::size ) {
474 xmm1 = xmm1 + A.get(i ,j) * x1;
475 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
476 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
479 y[i+1UL] =
sum( xmm2 );
480 y[i+2UL] =
sum( xmm3 );
482 for( ; (i+2UL) <= M; i+=2UL ) {
484 for(
size_t j=0UL; j<N; j+=IT::size ) {
486 xmm1 = xmm1 + A.get(i ,j) * x1;
487 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
490 y[i+1UL] =
sum( xmm2 );
494 for(
size_t j=0UL; j<N; j+=IT::size ) {
495 xmm1 = xmm1 + A.get(i,j) * x.get(j);
517 template<
typename VT1
520 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
521 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
523 selectDefaultAssignKernel( y, A, x );
543 template<
typename VT1
546 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
547 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
549 using boost::numeric_cast;
555 const int M ( numeric_cast<int>( A.rows() ) );
556 const int N ( numeric_cast<int>( A.columns() ) );
557 const int lda( numeric_cast<int>( A.spacing() ) );
559 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
560 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
581 template<
typename VT1
584 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
585 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
587 using boost::numeric_cast;
593 const int M ( numeric_cast<int>( A.rows() ) );
594 const int N ( numeric_cast<int>( A.columns() ) );
595 const int lda( numeric_cast<int>( A.spacing() ) );
597 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
598 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
619 template<
typename VT1
622 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
623 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
625 using boost::numeric_cast;
634 const int M ( numeric_cast<int>( A.rows() ) );
635 const int N ( numeric_cast<int>( A.columns() ) );
636 const int lda( numeric_cast<int>( A.spacing() ) );
637 const complex<float> alpha( 1.0F, 0.0F );
638 const complex<float> beta ( 0.0F, 0.0F );
640 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
641 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
662 template<
typename VT1
665 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
666 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
668 using boost::numeric_cast;
677 const int M ( numeric_cast<int>( A.rows() ) );
678 const int N ( numeric_cast<int>( A.columns() ) );
679 const int lda( numeric_cast<int>( A.spacing() ) );
680 const complex<double> alpha( 1.0, 0.0 );
681 const complex<double> beta ( 0.0, 0.0 );
683 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
684 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
702 template<
typename VT1 >
731 template<
typename VT1 >
738 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
750 if( ( IsComputation<MT>::value && !evaluate ) ||
752 DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
754 DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
773 template<
typename VT1
776 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
777 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
779 y.addAssign( A * x );
798 template<
typename VT1
801 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
802 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
804 typedef IntrinsicTrait<ElementType> IT;
806 const size_t M( A.rows() );
807 const size_t N( A.columns() );
811 for( ; (i+8UL) <= M; i+=8UL ) {
812 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
813 for(
size_t j=0UL; j<N; j+=IT::size ) {
815 xmm1 = xmm1 + A.get(i ,j) * x1;
816 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
817 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
818 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
819 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
820 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
821 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
822 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
824 y[i ] +=
sum( xmm1 );
825 y[i+1UL] +=
sum( xmm2 );
826 y[i+2UL] +=
sum( xmm3 );
827 y[i+3UL] +=
sum( xmm4 );
828 y[i+4UL] +=
sum( xmm5 );
829 y[i+5UL] +=
sum( xmm6 );
830 y[i+6UL] +=
sum( xmm7 );
831 y[i+7UL] +=
sum( xmm8 );
833 for( ; (i+4UL) <= M; i+=4UL ) {
835 for(
size_t j=0UL; j<N; j+=IT::size ) {
837 xmm1 = xmm1 + A.get(i ,j) * x1;
838 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
839 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
840 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
842 y[i ] +=
sum( xmm1 );
843 y[i+1UL] +=
sum( xmm2 );
844 y[i+2UL] +=
sum( xmm3 );
845 y[i+3UL] +=
sum( xmm4 );
847 for( ; (i+3UL) <= M; i+=3UL ) {
849 for(
size_t j=0UL; j<N; j+=IT::size ) {
851 xmm1 = xmm1 + A.get(i ,j) * x1;
852 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
853 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
855 y[i ] +=
sum( xmm1 );
856 y[i+1UL] +=
sum( xmm2 );
857 y[i+2UL] +=
sum( xmm3 );
859 for( ; (i+2UL) <= M; i+=2UL ) {
861 for(
size_t j=0UL; j<N; j+=IT::size ) {
863 xmm1 = xmm1 + A.get(i ,j) * x1;
864 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
866 y[i ] +=
sum( xmm1 );
867 y[i+1UL] +=
sum( xmm2 );
871 for(
size_t j=0UL; j<N; j+=IT::size ) {
872 xmm1 = xmm1 + A.get(i,j) * x.get(j);
894 template<
typename VT1
897 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
898 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
900 selectDefaultAddAssignKernel( y, A, x );
920 template<
typename VT1
923 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
924 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
926 using boost::numeric_cast;
932 const int M ( numeric_cast<int>( A.rows() ) );
933 const int N ( numeric_cast<int>( A.columns() ) );
934 const int lda( numeric_cast<int>( A.spacing() ) );
936 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
937 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
958 template<
typename VT1
961 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
962 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
964 using boost::numeric_cast;
970 const int M ( numeric_cast<int>( A.rows() ) );
971 const int N ( numeric_cast<int>( A.columns() ) );
972 const int lda( numeric_cast<int>( A.spacing() ) );
974 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
975 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
996 template<
typename VT1
999 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1000 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1002 using boost::numeric_cast;
1011 const int M ( numeric_cast<int>( A.rows() ) );
1012 const int N ( numeric_cast<int>( A.columns() ) );
1013 const int lda( numeric_cast<int>( A.spacing() ) );
1014 const complex<float> alpha( 1.0F, 0.0F );
1015 const complex<float> beta ( 1.0F, 0.0F );
1017 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1018 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1039 template<
typename VT1
1042 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1043 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1045 using boost::numeric_cast;
1054 const int M ( numeric_cast<int>( A.rows() ) );
1055 const int N ( numeric_cast<int>( A.columns() ) );
1056 const int lda( numeric_cast<int>( A.spacing() ) );
1057 const complex<double> alpha( 1.0, 0.0 );
1058 const complex<double> beta ( 1.0, 0.0 );
1060 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1061 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1083 template<
typename VT1 >
1090 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1102 if( ( IsComputation<MT>::value && !evaluate ) ||
1104 DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1106 DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1125 template<
typename VT1
1128 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1129 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1131 y.subAssign( A * x );
1150 template<
typename VT1
1153 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1154 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1156 typedef IntrinsicTrait<ElementType> IT;
1158 const size_t M( A.rows() );
1159 const size_t N( A.columns() );
1163 for( ; (i+8UL) <= M; i+=8UL ) {
1164 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1165 for(
size_t j=0UL; j<N; j+=IT::size ) {
1167 xmm1 = xmm1 + A.get(i ,j) * x1;
1168 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1169 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1170 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1171 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1172 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1173 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1174 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1176 y[i ] -=
sum( xmm1 );
1177 y[i+1UL] -=
sum( xmm2 );
1178 y[i+2UL] -=
sum( xmm3 );
1179 y[i+3UL] -=
sum( xmm4 );
1180 y[i+4UL] -=
sum( xmm5 );
1181 y[i+5UL] -=
sum( xmm6 );
1182 y[i+6UL] -=
sum( xmm7 );
1183 y[i+7UL] -=
sum( xmm8 );
1185 for( ; (i+4UL) <= M; i+=4UL ) {
1187 for(
size_t j=0UL; j<N; j+=IT::size ) {
1189 xmm1 = xmm1 + A.get(i ,j) * x1;
1190 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1191 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1192 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1194 y[i ] -=
sum( xmm1 );
1195 y[i+1UL] -=
sum( xmm2 );
1196 y[i+2UL] -=
sum( xmm3 );
1197 y[i+3UL] -=
sum( xmm4 );
1199 for( ; (i+3UL) <= M; i+=3UL ) {
1201 for(
size_t j=0UL; j<N; j+=IT::size ) {
1203 xmm1 = xmm1 + A.get(i ,j) * x1;
1204 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1205 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1207 y[i ] -=
sum( xmm1 );
1208 y[i+1UL] -=
sum( xmm2 );
1209 y[i+2UL] -=
sum( xmm3 );
1211 for( ; (i+2UL) <= M; i+=2UL ) {
1213 for(
size_t j=0UL; j<N; j+=IT::size ) {
1215 xmm1 = xmm1 + A.get(i ,j) * x1;
1216 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1218 y[i ] -=
sum( xmm1 );
1219 y[i+1UL] -=
sum( xmm2 );
1223 for(
size_t j=0UL; j<N; j+=IT::size ) {
1224 xmm1 = xmm1 + A.get(i,j) * x.get(j);
1226 y[i] -=
sum( xmm1 );
1246 template<
typename VT1
1249 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1250 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1252 selectDefaultSubAssignKernel( y, A, x );
1272 template<
typename VT1
1275 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1276 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1278 using boost::numeric_cast;
1284 const int M ( numeric_cast<int>( A.rows() ) );
1285 const int N ( numeric_cast<int>( A.columns() ) );
1286 const int lda( numeric_cast<int>( A.spacing() ) );
1288 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1289 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1310 template<
typename VT1
1313 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1314 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1316 using boost::numeric_cast;
1322 const int M ( numeric_cast<int>( A.rows() ) );
1323 const int N ( numeric_cast<int>( A.columns() ) );
1324 const int lda( numeric_cast<int>( A.spacing() ) );
1326 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1327 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1348 template<
typename VT1
1351 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1352 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1354 using boost::numeric_cast;
1363 const int M ( numeric_cast<int>( A.rows() ) );
1364 const int N ( numeric_cast<int>( A.columns() ) );
1365 const int lda( numeric_cast<int>( A.spacing() ) );
1366 const complex<float> alpha( -1.0F, 0.0F );
1367 const complex<float> beta ( 1.0F, 0.0F );
1369 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1370 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1391 template<
typename VT1
1394 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1395 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1397 using boost::numeric_cast;
1406 const int M ( numeric_cast<int>( A.rows() ) );
1407 const int N ( numeric_cast<int>( A.columns() ) );
1408 const int lda( numeric_cast<int>( A.spacing() ) );
1409 const complex<double> alpha( -1.0, 0.0 );
1410 const complex<double> beta ( 1.0, 0.0 );
1412 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1413 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1435 template<
typename VT1 >
1484 template<
typename MT
1488 :
public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1489 ,
private VecScalarMultExpr
1490 ,
private Computation
1494 typedef DMatDVecMultExpr<MT,VT> MVM;
1495 typedef typename MVM::ResultType RES;
1496 typedef typename MT::ResultType
MRT;
1497 typedef typename VT::ResultType
VRT;
1498 typedef typename MRT::ElementType
MET;
1499 typedef typename VRT::ElementType
VET;
1500 typedef typename MT::CompositeType
MCT;
1501 typedef typename VT::CompositeType
VCT;
1506 enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1507 IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1515 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1516 struct UseSinglePrecisionKernel {
1517 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1518 IsFloat<typename T1::ElementType>::value &&
1519 IsFloat<typename T2::ElementType>::value &&
1520 IsFloat<typename T3::ElementType>::value &&
1521 !IsComplex<T4>::value };
1530 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1531 struct UseDoublePrecisionKernel {
1532 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1533 IsDouble<typename T1::ElementType>::value &&
1534 IsDouble<typename T2::ElementType>::value &&
1535 IsDouble<typename T3::ElementType>::value &&
1536 !IsComplex<T4>::value };
1545 template<
typename T1,
typename T2,
typename T3 >
1546 struct UseSinglePrecisionComplexKernel {
1547 typedef complex<float> Type;
1548 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1549 IsSame<typename T1::ElementType,Type>::value &&
1550 IsSame<typename T2::ElementType,Type>::value &&
1551 IsSame<typename T3::ElementType,Type>::value };
1560 template<
typename T1,
typename T2,
typename T3 >
1561 struct UseDoublePrecisionComplexKernel {
1562 typedef complex<double> Type;
1563 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1564 IsSame<typename T1::ElementType,Type>::value &&
1565 IsSame<typename T2::ElementType,Type>::value &&
1566 IsSame<typename T3::ElementType,Type>::value };
1574 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1575 struct UseDefaultKernel {
1576 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1577 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1578 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1579 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1588 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1589 struct UseVectorizedDefaultKernel {
1590 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1591 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1592 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1593 IsSame<typename T1::ElementType,T4>::value &&
1594 IntrinsicTrait<typename T1::ElementType>::addition &&
1595 IntrinsicTrait<typename T1::ElementType>::multiplication };
1601 typedef DVecScalarMultExpr<MVM,ST,false>
This;
1602 typedef typename MultTrait<RES,ST>::Type
ResultType;
1604 typedef typename ResultType::ElementType
ElementType;
1605 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1610 typedef const DMatDVecMultExpr<MT,VT>
LeftOperand;
1616 typedef typename SelectType< evaluate, const MRT, MCT >::Type
LT;
1619 typedef typename SelectType< IsComputation<VT>::value,
const VRT,
VCT >::Type
RT;
1624 enum { vectorizable = 0 };
1633 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
1647 return vector_[index] * scalar_;
1656 inline size_t size()
const {
1657 return vector_.size();
1687 template<
typename T >
1688 inline bool canAlias(
const T* alias )
const {
1689 return vector_.canAlias( alias );
1699 template<
typename T >
1700 inline bool isAliased(
const T* alias )
const {
1701 return vector_.isAliased( alias );
1723 template<
typename VT1 >
1724 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
1730 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1731 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1733 if( left.rows() == 0UL ) {
1736 else if( left.columns() == 0UL ) {
1749 if( ( IsComputation<MT>::value && !evaluate ) ||
1751 DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1753 DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1771 template<
typename VT1
1775 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1776 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1778 y.assign( A * x * scalar );
1796 template<
typename VT1
1800 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1801 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1803 typedef IntrinsicTrait<ElementType> IT;
1805 const size_t M( A.rows() );
1806 const size_t N( A.columns() );
1810 for( ; (i+8UL) <= M; i+=8UL ) {
1811 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1812 for(
size_t j=0UL; j<N; j+=IT::size ) {
1814 xmm1 = xmm1 + A.get(i ,j) * x1;
1815 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1816 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1817 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1818 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1819 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1820 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1821 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1823 y[i ] =
sum( xmm1 ) * scalar;
1824 y[i+1UL] =
sum( xmm2 ) * scalar;
1825 y[i+2UL] =
sum( xmm3 ) * scalar;
1826 y[i+3UL] =
sum( xmm4 ) * scalar;
1827 y[i+4UL] =
sum( xmm5 ) * scalar;
1828 y[i+5UL] =
sum( xmm6 ) * scalar;
1829 y[i+6UL] =
sum( xmm7 ) * scalar;
1830 y[i+7UL] =
sum( xmm8 ) * scalar;
1832 for( ; (i+4UL) <= M; i+=4UL ) {
1834 for(
size_t j=0UL; j<N; j+=IT::size ) {
1836 xmm1 = xmm1 + A.get(i ,j) * x1;
1837 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1838 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1839 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1841 y[i ] =
sum( xmm1 ) * scalar;
1842 y[i+1UL] =
sum( xmm2 ) * scalar;
1843 y[i+2UL] =
sum( xmm3 ) * scalar;
1844 y[i+3UL] =
sum( xmm4 ) * scalar;
1846 for( ; (i+3UL) <= M; i+=3UL ) {
1848 for(
size_t j=0UL; j<N; j+=IT::size ) {
1850 xmm1 = xmm1 + A.get(i ,j) * x1;
1851 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1852 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1854 y[i ] =
sum( xmm1 ) * scalar;
1855 y[i+1UL] =
sum( xmm2 ) * scalar;
1856 y[i+2UL] =
sum( xmm3 ) * scalar;
1858 for( ; (i+2UL) <= M; i+=2UL ) {
1860 for(
size_t j=0UL; j<N; j+=IT::size ) {
1862 xmm1 = xmm1 + A.get(i ,j) * x1;
1863 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1865 y[i ] =
sum( xmm1 ) * scalar;
1866 y[i+1UL] =
sum( xmm2 ) * scalar;
1870 for(
size_t j=0UL; j<N; j+=IT::size ) {
1871 xmm1 = xmm1 + A.get(i,j) * x.get(j);
1873 y[i] =
sum( xmm1 ) * scalar;
1892 template<
typename VT1
1896 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1897 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1899 selectDefaultAssignKernel( y, A, x, scalar );
1918 template<
typename VT1
1922 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1923 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1925 using boost::numeric_cast;
1931 const int M ( numeric_cast<int>( A.rows() ) );
1932 const int N ( numeric_cast<int>( A.columns() ) );
1933 const int lda( numeric_cast<int>( A.spacing() ) );
1935 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1936 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1956 template<
typename VT1
1960 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1961 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1963 using boost::numeric_cast;
1969 const int M ( numeric_cast<int>( A.rows() ) );
1970 const int N ( numeric_cast<int>( A.columns() ) );
1971 const int lda( numeric_cast<int>( A.spacing() ) );
1973 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1974 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1994 template<
typename VT1
1998 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1999 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2001 using boost::numeric_cast;
2010 const int M ( numeric_cast<int>( A.rows() ) );
2011 const int N ( numeric_cast<int>( A.columns() ) );
2012 const int lda( numeric_cast<int>( A.spacing() ) );
2013 const complex<float> alpha( scalar );
2014 const complex<float> beta ( 0.0F, 0.0F );
2016 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2017 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2037 template<
typename VT1
2041 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2042 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2044 using boost::numeric_cast;
2053 const int M ( numeric_cast<int>( A.rows() ) );
2054 const int N ( numeric_cast<int>( A.columns() ) );
2055 const int lda( numeric_cast<int>( A.spacing() ) );
2056 const complex<double> alpha( scalar );
2057 const complex<double> beta ( 0.0, 0.0 );
2059 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2060 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2076 template<
typename VT1 >
2077 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2104 template<
typename VT1 >
2105 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2111 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2112 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2114 if( left.rows() == 0UL || left.columns() == 0UL ) {
2126 if( ( IsComputation<MT>::value && !evaluate ) ||
2128 DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2130 DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2148 template<
typename VT1
2152 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2153 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2155 y.addAssign( A * x * scalar );
2173 template<
typename VT1
2177 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2178 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2180 typedef IntrinsicTrait<ElementType> IT;
2182 const size_t M( A.rows() );
2183 const size_t N( A.columns() );
2187 for( ; (i+8UL) <= M; i+=8UL ) {
2188 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2189 for(
size_t j=0UL; j<N; j+=IT::size ) {
2191 xmm1 = xmm1 + A.get(i ,j) * x1;
2192 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2193 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2194 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2195 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2196 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2197 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2198 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2200 y[i ] +=
sum( xmm1 ) * scalar;
2201 y[i+1UL] +=
sum( xmm2 ) * scalar;
2202 y[i+2UL] +=
sum( xmm3 ) * scalar;
2203 y[i+3UL] +=
sum( xmm4 ) * scalar;
2204 y[i+4UL] +=
sum( xmm5 ) * scalar;
2205 y[i+5UL] +=
sum( xmm6 ) * scalar;
2206 y[i+6UL] +=
sum( xmm7 ) * scalar;
2207 y[i+7UL] +=
sum( xmm8 ) * scalar;
2209 for( ; (i+4UL) <= M; i+=4UL ) {
2211 for(
size_t j=0UL; j<N; j+=IT::size ) {
2213 xmm1 = xmm1 + A.get(i ,j) * x1;
2214 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2215 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2216 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2218 y[i ] +=
sum( xmm1 ) * scalar;
2219 y[i+1UL] +=
sum( xmm2 ) * scalar;
2220 y[i+2UL] +=
sum( xmm3 ) * scalar;
2221 y[i+3UL] +=
sum( xmm4 ) * scalar;
2223 for( ; (i+3UL) <= M; i+=3UL ) {
2225 for(
size_t j=0UL; j<N; j+=IT::size ) {
2227 xmm1 = xmm1 + A.get(i ,j) * x1;
2228 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2229 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2231 y[i ] +=
sum( xmm1 ) * scalar;
2232 y[i+1UL] +=
sum( xmm2 ) * scalar;
2233 y[i+2UL] +=
sum( xmm3 ) * scalar;
2235 for( ; (i+2UL) <= M; i+=2UL ) {
2237 for(
size_t j=0UL; j<N; j+=IT::size ) {
2239 xmm1 = xmm1 + A.get(i ,j) * x1;
2240 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2242 y[i ] +=
sum( xmm1 ) * scalar;
2243 y[i+1UL] +=
sum( xmm2 ) * scalar;
2247 for(
size_t j=0UL; j<N; j+=IT::size ) {
2248 xmm1 = xmm1 + A.get(i,j) * x.get(j);
2250 y[i] +=
sum( xmm1 ) * scalar;
2269 template<
typename VT1
2273 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2274 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2276 selectDefaultAddAssignKernel( y, A, x, scalar );
2295 template<
typename VT1
2299 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2300 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2302 using boost::numeric_cast;
2308 const int M ( numeric_cast<int>( A.rows() ) );
2309 const int N ( numeric_cast<int>( A.columns() ) );
2310 const int lda( numeric_cast<int>( A.spacing() ) );
2312 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2313 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2333 template<
typename VT1
2337 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2338 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2340 using boost::numeric_cast;
2346 const int M ( numeric_cast<int>( A.rows() ) );
2347 const int N ( numeric_cast<int>( A.columns() ) );
2348 const int lda( numeric_cast<int>( A.spacing() ) );
2350 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2351 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2371 template<
typename VT1
2375 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2376 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2378 using boost::numeric_cast;
2387 const int M ( numeric_cast<int>( A.rows() ) );
2388 const int N ( numeric_cast<int>( A.columns() ) );
2389 const int lda( numeric_cast<int>( A.spacing() ) );
2390 const complex<float> alpha( scalar );
2391 const complex<float> beta ( 1.0F, 0.0F );
2393 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2394 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2414 template<
typename VT1
2418 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2419 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2421 using boost::numeric_cast;
2430 const int M ( numeric_cast<int>( A.rows() ) );
2431 const int N ( numeric_cast<int>( A.columns() ) );
2432 const int lda( numeric_cast<int>( A.spacing() ) );
2433 const complex<double> alpha( scalar );
2434 const complex<double> beta ( 1.0, 0.0 );
2436 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2437 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2458 template<
typename VT1 >
2459 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2465 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2466 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2468 if( left.rows() == 0UL || left.columns() == 0UL ) {
2480 if( ( IsComputation<MT>::value && !evaluate ) ||
2482 DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2484 DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2502 template<
typename VT1
2506 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2507 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2509 y.subAssign( A * x * scalar );
2527 template<
typename VT1
2531 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2532 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2534 typedef IntrinsicTrait<ElementType> IT;
2536 const size_t M( A.rows() );
2537 const size_t N( A.columns() );
2541 for( ; (i+8UL) <= M; i+=8UL ) {
2542 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2543 for(
size_t j=0UL; j<N; j+=IT::size ) {
2545 xmm1 = xmm1 + A.get(i ,j) * x1;
2546 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2547 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2548 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2549 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2550 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2551 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2552 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2554 y[i ] -=
sum( xmm1 ) * scalar;
2555 y[i+1UL] -=
sum( xmm2 ) * scalar;
2556 y[i+2UL] -=
sum( xmm3 ) * scalar;
2557 y[i+3UL] -=
sum( xmm4 ) * scalar;
2558 y[i+4UL] -=
sum( xmm5 ) * scalar;
2559 y[i+5UL] -=
sum( xmm6 ) * scalar;
2560 y[i+6UL] -=
sum( xmm7 ) * scalar;
2561 y[i+7UL] -=
sum( xmm8 ) * scalar;
2563 for( ; (i+4UL) <= M; i+=4UL ) {
2565 for(
size_t j=0UL; j<N; j+=IT::size ) {
2567 xmm1 = xmm1 + A.get(i ,j) * x1;
2568 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2569 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2570 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2572 y[i ] -=
sum( xmm1 ) * scalar;
2573 y[i+1UL] -=
sum( xmm2 ) * scalar;
2574 y[i+2UL] -=
sum( xmm3 ) * scalar;
2575 y[i+3UL] -=
sum( xmm4 ) * scalar;
2577 for( ; (i+3UL) <= M; i+=3UL ) {
2579 for(
size_t j=0UL; j<N; j+=IT::size ) {
2581 xmm1 = xmm1 + A.get(i ,j) * x1;
2582 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2583 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2585 y[i ] -=
sum( xmm1 ) * scalar;
2586 y[i+1UL] -=
sum( xmm2 ) * scalar;
2587 y[i+2UL] -=
sum( xmm3 ) * scalar;
2589 for( ; (i+2UL) <= M; i+=2UL ) {
2591 for(
size_t j=0UL; j<N; j+=IT::size ) {
2593 xmm1 = xmm1 + A.get(i ,j) * x1;
2594 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2596 y[i ] -=
sum( xmm1 ) * scalar;
2597 y[i+1UL] -=
sum( xmm2 ) * scalar;
2601 for(
size_t j=0UL; j<N; j+=IT::size ) {
2602 xmm1 = xmm1 + A.get(i,j) * x.get(j);
2604 y[i] -=
sum( xmm1 ) * scalar;
2623 template<
typename VT1
2627 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2628 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2630 selectDefaultSubAssignKernel( y, A, x, scalar );
2649 template<
typename VT1
2653 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2654 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2656 using boost::numeric_cast;
2662 const int M ( numeric_cast<int>( A.rows() ) );
2663 const int N ( numeric_cast<int>( A.columns() ) );
2664 const int lda( numeric_cast<int>( A.spacing() ) );
2666 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2667 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2687 template<
typename VT1
2691 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2692 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2694 using boost::numeric_cast;
2700 const int M ( numeric_cast<int>( A.rows() ) );
2701 const int N ( numeric_cast<int>( A.columns() ) );
2702 const int lda( numeric_cast<int>( A.spacing() ) );
2704 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2705 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2725 template<
typename VT1
2729 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2730 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2732 using boost::numeric_cast;
2741 const int M ( numeric_cast<int>( A.rows() ) );
2742 const int N ( numeric_cast<int>( A.columns() ) );
2743 const int lda( numeric_cast<int>( A.spacing() ) );
2744 const complex<float> alpha( -scalar );
2745 const complex<float> beta ( 1.0F, 0.0F );
2747 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2748 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2768 template<
typename VT1
2772 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2773 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2775 using boost::numeric_cast;
2784 const int M ( numeric_cast<int>( A.rows() ) );
2785 const int N ( numeric_cast<int>( A.columns() ) );
2786 const int lda( numeric_cast<int>( A.spacing() ) );
2787 const complex<double> alpha( -scalar );
2788 const complex<double> beta ( 1.0, 0.0 );
2790 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2791 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2812 template<
typename VT1 >
2813 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2885 template<
typename T1
2887 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2893 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
2921 template<
typename T1
2924 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2929 return (~mat).leftOperand() * ( (~mat).
rightOperand() * vec );