22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDVECMULTEXPR_H_
31 #include <boost/cast.hpp>
92 typedef typename MT::ResultType
MRT;
93 typedef typename VT::ResultType
VRT;
94 typedef typename MRT::ElementType
MET;
95 typedef typename VRT::ElementType
VET;
96 typedef typename MT::CompositeType
MCT;
97 typedef typename VT::CompositeType
VCT;
112 template<
typename T1,
typename T2,
typename T3 >
113 struct UseSinglePrecisionKernel {
127 template<
typename T1,
typename T2,
typename T3 >
128 struct UseDoublePrecisionKernel {
129 enum { value = IsDouble<typename T1::ElementType>::value &&
130 IsDouble<typename T2::ElementType>::value &&
131 IsDouble<typename T3::ElementType>::value };
142 template<
typename T1,
typename T2,
typename T3 >
143 struct UseSinglePrecisionComplexKernel {
144 typedef complex<float> Type;
145 enum { value = IsSame<typename T1::ElementType,Type>::value &&
146 IsSame<typename T2::ElementType,Type>::value &&
147 IsSame<typename T3::ElementType,Type>::value };
158 template<
typename T1,
typename T2,
typename T3 >
159 struct UseDoublePrecisionComplexKernel {
160 typedef complex<double> Type;
161 enum { value = IsSame<typename T1::ElementType,Type>::value &&
162 IsSame<typename T2::ElementType,Type>::value &&
163 IsSame<typename T3::ElementType,Type>::value };
173 template<
typename T1,
typename T2,
typename T3 >
174 struct UseDefaultKernel {
175 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
176 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
177 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
178 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
189 template<
typename T1,
typename T2,
typename T3 >
190 struct UseVectorizedDefaultKernel {
191 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
192 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
193 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
194 IntrinsicTrait<typename T1::ElementType>::addition &&
195 IntrinsicTrait<typename T1::ElementType>::multiplication };
225 enum { vectorizable = 0 };
259 if(
mat_.columns() != 0UL ) {
261 for(
size_t j=1UL; j<
end_; j+=2UL ) {
264 if( end_ <
mat_.columns() ) {
312 template<
typename T >
339 template<
typename VT1 >
344 if( rhs.
mat_.rows() == 0UL ) {
347 else if( rhs.
mat_.columns() == 0UL ) {
362 DMatDVecMultExpr::selectDefaultAssignKernel( ~lhs, A, x );
364 DMatDVecMultExpr::selectBlasAssignKernel( ~lhs, A, x );
383 template<
typename VT1
387 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
408 template<
typename VT1
411 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
412 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
414 typedef IntrinsicTrait<ElementType> IT;
416 const size_t M( A.rows() );
417 const size_t N( A.columns() );
421 for( ; (i+8UL) <= M; i+=8UL ) {
422 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
423 for(
size_t j=0UL; j<N; j+=IT::size ) {
425 xmm1 = xmm1 + A.get(i ,j) * x1;
426 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
427 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
428 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
429 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
430 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
431 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
432 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
435 y[i+1UL] =
sum( xmm2 );
436 y[i+2UL] =
sum( xmm3 );
437 y[i+3UL] =
sum( xmm4 );
438 y[i+4UL] =
sum( xmm5 );
439 y[i+5UL] =
sum( xmm6 );
440 y[i+6UL] =
sum( xmm7 );
441 y[i+7UL] =
sum( xmm8 );
443 for( ; (i+4UL) <= M; i+=4UL ) {
445 for(
size_t j=0UL; j<N; j+=IT::size ) {
447 xmm1 = xmm1 + A.get(i ,j) * x1;
448 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
449 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
450 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
453 y[i+1UL] =
sum( xmm2 );
454 y[i+2UL] =
sum( xmm3 );
455 y[i+3UL] =
sum( xmm4 );
457 for( ; (i+3UL) <= M; i+=3UL ) {
459 for(
size_t j=0UL; j<N; j+=IT::size ) {
461 xmm1 = xmm1 + A.get(i ,j) * x1;
462 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
463 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
466 y[i+1UL] =
sum( xmm2 );
467 y[i+2UL] =
sum( xmm3 );
469 for( ; (i+2UL) <= M; i+=2UL ) {
471 for(
size_t j=0UL; j<N; j+=IT::size ) {
473 xmm1 = xmm1 + A.get(i ,j) * x1;
474 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
477 y[i+1UL] =
sum( xmm2 );
481 for(
size_t j=0UL; j<N; j+=IT::size ) {
482 xmm1 = xmm1 + A.get(i,j) * x.get(j);
504 template<
typename VT1
507 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
508 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
510 selectDefaultAssignKernel( y, A, x );
530 template<
typename VT1
533 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
534 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
536 using boost::numeric_cast;
542 const int M ( numeric_cast<int>( A.rows() ) );
543 const int N ( numeric_cast<int>( A.columns() ) );
544 const int lda( numeric_cast<int>( A.spacing() ) );
546 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
547 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
568 template<
typename VT1
571 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
572 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
574 using boost::numeric_cast;
580 const int M ( numeric_cast<int>( A.rows() ) );
581 const int N ( numeric_cast<int>( A.columns() ) );
582 const int lda( numeric_cast<int>( A.spacing() ) );
584 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
585 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
606 template<
typename VT1
609 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
610 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
612 using boost::numeric_cast;
621 const int M ( numeric_cast<int>( A.rows() ) );
622 const int N ( numeric_cast<int>( A.columns() ) );
623 const int lda( numeric_cast<int>( A.spacing() ) );
624 const complex<float> alpha( 1.0F, 0.0F );
625 const complex<float> beta ( 0.0F, 0.0F );
627 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
628 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
649 template<
typename VT1
652 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
653 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
655 using boost::numeric_cast;
664 const int M ( numeric_cast<int>( A.rows() ) );
665 const int N ( numeric_cast<int>( A.columns() ) );
666 const int lda( numeric_cast<int>( A.spacing() ) );
667 const complex<double> alpha( 1.0, 0.0 );
668 const complex<double> beta ( 0.0, 0.0 );
670 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
671 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
689 template<
typename VT1 >
716 template<
typename VT1 >
721 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
733 if( ( IsComputation<MT>::value && !evaluate ) ||
735 DMatDVecMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x );
737 DMatDVecMultExpr::selectBlasAddAssignKernel( ~lhs, A, x );
756 template<
typename VT1
759 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
760 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
762 y.addAssign( A * x );
781 template<
typename VT1
784 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
785 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
787 typedef IntrinsicTrait<ElementType> IT;
789 const size_t M( A.rows() );
790 const size_t N( A.columns() );
794 for( ; (i+8UL) <= M; i+=8UL ) {
795 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
796 for(
size_t j=0UL; j<N; j+=IT::size ) {
798 xmm1 = xmm1 + A.get(i ,j) * x1;
799 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
800 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
801 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
802 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
803 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
804 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
805 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
807 y[i ] +=
sum( xmm1 );
808 y[i+1UL] +=
sum( xmm2 );
809 y[i+2UL] +=
sum( xmm3 );
810 y[i+3UL] +=
sum( xmm4 );
811 y[i+4UL] +=
sum( xmm5 );
812 y[i+5UL] +=
sum( xmm6 );
813 y[i+6UL] +=
sum( xmm7 );
814 y[i+7UL] +=
sum( xmm8 );
816 for( ; (i+4UL) <= M; i+=4UL ) {
818 for(
size_t j=0UL; j<N; j+=IT::size ) {
820 xmm1 = xmm1 + A.get(i ,j) * x1;
821 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
822 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
823 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
825 y[i ] +=
sum( xmm1 );
826 y[i+1UL] +=
sum( xmm2 );
827 y[i+2UL] +=
sum( xmm3 );
828 y[i+3UL] +=
sum( xmm4 );
830 for( ; (i+3UL) <= M; i+=3UL ) {
832 for(
size_t j=0UL; j<N; j+=IT::size ) {
834 xmm1 = xmm1 + A.get(i ,j) * x1;
835 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
836 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
838 y[i ] +=
sum( xmm1 );
839 y[i+1UL] +=
sum( xmm2 );
840 y[i+2UL] +=
sum( xmm3 );
842 for( ; (i+2UL) <= M; i+=2UL ) {
844 for(
size_t j=0UL; j<N; j+=IT::size ) {
846 xmm1 = xmm1 + A.get(i ,j) * x1;
847 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
849 y[i ] +=
sum( xmm1 );
850 y[i+1UL] +=
sum( xmm2 );
854 for(
size_t j=0UL; j<N; j+=IT::size ) {
855 xmm1 = xmm1 + A.get(i,j) * x.get(j);
877 template<
typename VT1
880 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
881 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
883 selectDefaultAddAssignKernel( y, A, x );
903 template<
typename VT1
906 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
907 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
909 using boost::numeric_cast;
915 const int M ( numeric_cast<int>( A.rows() ) );
916 const int N ( numeric_cast<int>( A.columns() ) );
917 const int lda( numeric_cast<int>( A.spacing() ) );
919 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0F,
920 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
941 template<
typename VT1
944 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
945 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
947 using boost::numeric_cast;
953 const int M ( numeric_cast<int>( A.rows() ) );
954 const int N ( numeric_cast<int>( A.columns() ) );
955 const int lda( numeric_cast<int>( A.spacing() ) );
957 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, 1.0,
958 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
979 template<
typename VT1
982 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
983 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
985 using boost::numeric_cast;
994 const int M ( numeric_cast<int>( A.rows() ) );
995 const int N ( numeric_cast<int>( A.columns() ) );
996 const int lda( numeric_cast<int>( A.spacing() ) );
997 const complex<float> alpha( 1.0F, 0.0F );
998 const complex<float> beta ( 1.0F, 0.0F );
1000 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1001 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1022 template<
typename VT1
1025 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1026 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1028 using boost::numeric_cast;
1037 const int M ( numeric_cast<int>( A.rows() ) );
1038 const int N ( numeric_cast<int>( A.columns() ) );
1039 const int lda( numeric_cast<int>( A.spacing() ) );
1040 const complex<double> alpha( 1.0, 0.0 );
1041 const complex<double> beta ( 1.0, 0.0 );
1043 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1044 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1066 template<
typename VT1 >
1071 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1083 if( ( IsComputation<MT>::value && !evaluate ) ||
1085 DMatDVecMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x );
1087 DMatDVecMultExpr::selectBlasSubAssignKernel( ~lhs, A, x );
1106 template<
typename VT1
1109 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1110 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1112 y.subAssign( A * x );
1131 template<
typename VT1
1134 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1135 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1137 typedef IntrinsicTrait<ElementType> IT;
1139 const size_t M( A.rows() );
1140 const size_t N( A.columns() );
1144 for( ; (i+8UL) <= M; i+=8UL ) {
1145 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1146 for(
size_t j=0UL; j<N; j+=IT::size ) {
1148 xmm1 = xmm1 + A.get(i ,j) * x1;
1149 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1150 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1151 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1152 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1153 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1154 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1155 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1157 y[i ] -=
sum( xmm1 );
1158 y[i+1UL] -=
sum( xmm2 );
1159 y[i+2UL] -=
sum( xmm3 );
1160 y[i+3UL] -=
sum( xmm4 );
1161 y[i+4UL] -=
sum( xmm5 );
1162 y[i+5UL] -=
sum( xmm6 );
1163 y[i+6UL] -=
sum( xmm7 );
1164 y[i+7UL] -=
sum( xmm8 );
1166 for( ; (i+4UL) <= M; i+=4UL ) {
1168 for(
size_t j=0UL; j<N; j+=IT::size ) {
1170 xmm1 = xmm1 + A.get(i ,j) * x1;
1171 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1172 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1173 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1175 y[i ] -=
sum( xmm1 );
1176 y[i+1UL] -=
sum( xmm2 );
1177 y[i+2UL] -=
sum( xmm3 );
1178 y[i+3UL] -=
sum( xmm4 );
1180 for( ; (i+3UL) <= M; i+=3UL ) {
1182 for(
size_t j=0UL; j<N; j+=IT::size ) {
1184 xmm1 = xmm1 + A.get(i ,j) * x1;
1185 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1186 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1188 y[i ] -=
sum( xmm1 );
1189 y[i+1UL] -=
sum( xmm2 );
1190 y[i+2UL] -=
sum( xmm3 );
1192 for( ; (i+2UL) <= M; i+=2UL ) {
1194 for(
size_t j=0UL; j<N; j+=IT::size ) {
1196 xmm1 = xmm1 + A.get(i ,j) * x1;
1197 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1199 y[i ] -=
sum( xmm1 );
1200 y[i+1UL] -=
sum( xmm2 );
1204 for(
size_t j=0UL; j<N; j+=IT::size ) {
1205 xmm1 = xmm1 + A.get(i,j) * x.get(j);
1207 y[i] -=
sum( xmm1 );
1227 template<
typename VT1
1230 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1231 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1233 selectDefaultSubAssignKernel( y, A, x );
1253 template<
typename VT1
1256 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1257 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1259 using boost::numeric_cast;
1265 const int M ( numeric_cast<int>( A.rows() ) );
1266 const int N ( numeric_cast<int>( A.columns() ) );
1267 const int lda( numeric_cast<int>( A.spacing() ) );
1269 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0F,
1270 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1291 template<
typename VT1
1294 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1295 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1297 using boost::numeric_cast;
1303 const int M ( numeric_cast<int>( A.rows() ) );
1304 const int N ( numeric_cast<int>( A.columns() ) );
1305 const int lda( numeric_cast<int>( A.spacing() ) );
1307 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -1.0,
1308 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1329 template<
typename VT1
1332 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1333 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1335 using boost::numeric_cast;
1344 const int M ( numeric_cast<int>( A.rows() ) );
1345 const int N ( numeric_cast<int>( A.columns() ) );
1346 const int lda( numeric_cast<int>( A.spacing() ) );
1347 const complex<float> alpha( -1.0F, 0.0F );
1348 const complex<float> beta ( 1.0F, 0.0F );
1350 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1351 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1372 template<
typename VT1
1375 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1376 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1378 using boost::numeric_cast;
1387 const int M ( numeric_cast<int>( A.rows() ) );
1388 const int N ( numeric_cast<int>( A.columns() ) );
1389 const int lda( numeric_cast<int>( A.spacing() ) );
1390 const complex<double> alpha( -1.0, 0.0 );
1391 const complex<double> beta ( 1.0, 0.0 );
1393 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1394 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1416 template<
typename VT1 >
1463 template<
typename MT
1467 :
public DenseVector< DVecScalarMultExpr< DMatDVecMultExpr<MT,VT>, ST, false >, false >
1468 ,
private Expression
1469 ,
private Computation
1473 typedef DMatDVecMultExpr<MT,VT> MVM;
1474 typedef typename MVM::ResultType RES;
1475 typedef typename MT::ResultType
MRT;
1476 typedef typename VT::ResultType
VRT;
1477 typedef typename MRT::ElementType
MET;
1478 typedef typename VRT::ElementType
VET;
1479 typedef typename MT::CompositeType
MCT;
1480 typedef typename VT::CompositeType
VCT;
1485 enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1486 IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1494 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1495 struct UseSinglePrecisionKernel {
1496 enum { value = IsFloat<typename T1::ElementType>::value &&
1497 IsFloat<typename T2::ElementType>::value &&
1498 IsFloat<typename T3::ElementType>::value &&
1499 !IsComplex<T4>::value };
1508 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1509 struct UseDoublePrecisionKernel {
1510 enum { value = IsDouble<typename T1::ElementType>::value &&
1511 IsDouble<typename T2::ElementType>::value &&
1512 IsDouble<typename T3::ElementType>::value &&
1513 !IsComplex<T4>::value };
1522 template<
typename T1,
typename T2,
typename T3 >
1523 struct UseSinglePrecisionComplexKernel {
1524 typedef complex<float> Type;
1525 enum { value = IsSame<typename T1::ElementType,Type>::value &&
1526 IsSame<typename T2::ElementType,Type>::value &&
1527 IsSame<typename T3::ElementType,Type>::value };
1536 template<
typename T1,
typename T2,
typename T3 >
1537 struct UseDoublePrecisionComplexKernel {
1538 typedef complex<double> Type;
1539 enum { value = IsSame<typename T1::ElementType,Type>::value &&
1540 IsSame<typename T2::ElementType,Type>::value &&
1541 IsSame<typename T3::ElementType,Type>::value };
1549 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1550 struct UseDefaultKernel {
1551 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1552 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1553 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1554 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1563 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1564 struct UseVectorizedDefaultKernel {
1565 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1566 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1567 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1568 IsSame<typename T1::ElementType,T4>::value &&
1569 IntrinsicTrait<typename T1::ElementType>::addition &&
1570 IntrinsicTrait<typename T1::ElementType>::multiplication };
1576 typedef DVecScalarMultExpr<MVM,ST,false>
This;
1577 typedef typename MultTrait<RES,ST>::Type
ResultType;
1579 typedef typename ResultType::ElementType
ElementType;
1580 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1585 typedef const DMatDVecMultExpr<MT,VT>
LeftOperand;
1591 typedef typename SelectType< evaluate, const MRT, MCT >::Type
LT;
1594 typedef typename SelectType< IsComputation<VT>::value,
const VRT,
VCT >::Type
RT;
1599 enum { vectorizable = 0 };
1602 enum { canAlias = CanAlias<MVM>::value };
1611 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
1625 return vector_[index] * scalar_;
1634 inline size_t size()
const {
1635 return vector_.size();
1665 template<
typename T >
1666 inline bool isAliased(
const T* alias )
const {
1667 return CanAlias<MVM>::value && vector_.isAliased( alias );
1689 template<
typename VT1 >
1690 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
1694 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
1695 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
1697 if( left.rows() == 0UL ) {
1700 else if( left.columns() == 0UL ) {
1713 if( ( IsComputation<MT>::value && !evaluate ) ||
1715 DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, x, rhs.scalar_ );
1717 DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, A, x, rhs.scalar_ );
1735 template<
typename VT1
1739 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1740 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1742 y.assign( A * x * scalar );
1760 template<
typename VT1
1764 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1765 selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1767 typedef IntrinsicTrait<ElementType> IT;
1769 const size_t M( A.rows() );
1770 const size_t N( A.columns() );
1774 for( ; (i+8UL) <= M; i+=8UL ) {
1775 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1776 for(
size_t j=0UL; j<N; j+=IT::size ) {
1778 xmm1 = xmm1 + A.get(i ,j) * x1;
1779 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1780 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1781 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1782 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
1783 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
1784 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
1785 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
1787 y[i ] =
sum( xmm1 ) * scalar;
1788 y[i+1UL] =
sum( xmm2 ) * scalar;
1789 y[i+2UL] =
sum( xmm3 ) * scalar;
1790 y[i+3UL] =
sum( xmm4 ) * scalar;
1791 y[i+4UL] =
sum( xmm5 ) * scalar;
1792 y[i+5UL] =
sum( xmm6 ) * scalar;
1793 y[i+6UL] =
sum( xmm7 ) * scalar;
1794 y[i+7UL] =
sum( xmm8 ) * scalar;
1796 for( ; (i+4UL) <= M; i+=4UL ) {
1798 for(
size_t j=0UL; j<N; j+=IT::size ) {
1800 xmm1 = xmm1 + A.get(i ,j) * x1;
1801 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1802 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1803 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
1805 y[i ] =
sum( xmm1 ) * scalar;
1806 y[i+1UL] =
sum( xmm2 ) * scalar;
1807 y[i+2UL] =
sum( xmm3 ) * scalar;
1808 y[i+3UL] =
sum( xmm4 ) * scalar;
1810 for( ; (i+3UL) <= M; i+=3UL ) {
1812 for(
size_t j=0UL; j<N; j+=IT::size ) {
1814 xmm1 = xmm1 + A.get(i ,j) * x1;
1815 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1816 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
1818 y[i ] =
sum( xmm1 ) * scalar;
1819 y[i+1UL] =
sum( xmm2 ) * scalar;
1820 y[i+2UL] =
sum( xmm3 ) * scalar;
1822 for( ; (i+2UL) <= M; i+=2UL ) {
1824 for(
size_t j=0UL; j<N; j+=IT::size ) {
1826 xmm1 = xmm1 + A.get(i ,j) * x1;
1827 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
1829 y[i ] =
sum( xmm1 ) * scalar;
1830 y[i+1UL] =
sum( xmm2 ) * scalar;
1834 for(
size_t j=0UL; j<N; j+=IT::size ) {
1835 xmm1 = xmm1 + A.get(i,j) * x.get(j);
1837 y[i] =
sum( xmm1 ) * scalar;
1856 template<
typename VT1
1860 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
1861 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1863 selectDefaultAssignKernel( y, A, x, scalar );
1882 template<
typename VT1
1886 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1887 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1889 using boost::numeric_cast;
1895 const int M ( numeric_cast<int>( A.rows() ) );
1896 const int N ( numeric_cast<int>( A.columns() ) );
1897 const int lda( numeric_cast<int>( A.spacing() ) );
1899 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1900 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1920 template<
typename VT1
1924 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
1925 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1927 using boost::numeric_cast;
1933 const int M ( numeric_cast<int>( A.rows() ) );
1934 const int N ( numeric_cast<int>( A.columns() ) );
1935 const int lda( numeric_cast<int>( A.spacing() ) );
1937 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
1938 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1958 template<
typename VT1
1962 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1963 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
1965 using boost::numeric_cast;
1975 const int M ( numeric_cast<int>( A.rows() ) );
1976 const int N ( numeric_cast<int>( A.columns() ) );
1977 const int lda( numeric_cast<int>( A.spacing() ) );
1978 const complex<float> alpha( scalar );
1979 const complex<float> beta ( 0.0F, 0.0F );
1981 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
1982 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2002 template<
typename VT1
2006 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2007 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2009 using boost::numeric_cast;
2019 const int M ( numeric_cast<int>( A.rows() ) );
2020 const int N ( numeric_cast<int>( A.columns() ) );
2021 const int lda( numeric_cast<int>( A.spacing() ) );
2022 const complex<double> alpha( scalar );
2023 const complex<double> beta ( 0.0, 0.0 );
2025 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2026 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2042 template<
typename VT1 >
2068 template<
typename VT1 >
2069 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2073 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2074 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2076 if( left.rows() == 0UL || left.columns() == 0UL ) {
2088 if( ( IsComputation<MT>::value && !evaluate ) ||
2090 DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2092 DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
2110 template<
typename VT1
2114 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2115 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2117 y.addAssign( A * x * scalar );
2135 template<
typename VT1
2139 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2140 selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2142 typedef IntrinsicTrait<ElementType> IT;
2144 const size_t M( A.rows() );
2145 const size_t N( A.columns() );
2149 for( ; (i+8UL) <= M; i+=8UL ) {
2150 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2151 for(
size_t j=0UL; j<N; j+=IT::size ) {
2153 xmm1 = xmm1 + A.get(i ,j) * x1;
2154 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2155 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2156 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2157 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2158 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2159 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2160 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2162 y[i ] +=
sum( xmm1 ) * scalar;
2163 y[i+1UL] +=
sum( xmm2 ) * scalar;
2164 y[i+2UL] +=
sum( xmm3 ) * scalar;
2165 y[i+3UL] +=
sum( xmm4 ) * scalar;
2166 y[i+4UL] +=
sum( xmm5 ) * scalar;
2167 y[i+5UL] +=
sum( xmm6 ) * scalar;
2168 y[i+6UL] +=
sum( xmm7 ) * scalar;
2169 y[i+7UL] +=
sum( xmm8 ) * scalar;
2171 for( ; (i+4UL) <= M; i+=4UL ) {
2173 for(
size_t j=0UL; j<N; j+=IT::size ) {
2175 xmm1 = xmm1 + A.get(i ,j) * x1;
2176 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2177 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2178 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2180 y[i ] +=
sum( xmm1 ) * scalar;
2181 y[i+1UL] +=
sum( xmm2 ) * scalar;
2182 y[i+2UL] +=
sum( xmm3 ) * scalar;
2183 y[i+3UL] +=
sum( xmm4 ) * scalar;
2185 for( ; (i+3UL) <= M; i+=3UL ) {
2187 for(
size_t j=0UL; j<N; j+=IT::size ) {
2189 xmm1 = xmm1 + A.get(i ,j) * x1;
2190 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2191 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2193 y[i ] +=
sum( xmm1 ) * scalar;
2194 y[i+1UL] +=
sum( xmm2 ) * scalar;
2195 y[i+2UL] +=
sum( xmm3 ) * scalar;
2197 for( ; (i+2UL) <= M; i+=2UL ) {
2199 for(
size_t j=0UL; j<N; j+=IT::size ) {
2201 xmm1 = xmm1 + A.get(i ,j) * x1;
2202 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2204 y[i ] +=
sum( xmm1 ) * scalar;
2205 y[i+1UL] +=
sum( xmm2 ) * scalar;
2209 for(
size_t j=0UL; j<N; j+=IT::size ) {
2210 xmm1 = xmm1 + A.get(i,j) * x.get(j);
2212 y[i] +=
sum( xmm1 ) * scalar;
2231 template<
typename VT1
2235 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2236 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2238 selectDefaultAddAssignKernel( y, A, x, scalar );
2257 template<
typename VT1
2261 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2262 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2264 using boost::numeric_cast;
2270 const int M ( numeric_cast<int>( A.rows() ) );
2271 const int N ( numeric_cast<int>( A.columns() ) );
2272 const int lda( numeric_cast<int>( A.spacing() ) );
2274 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2275 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2295 template<
typename VT1
2299 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2300 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2302 using boost::numeric_cast;
2308 const int M ( numeric_cast<int>( A.rows() ) );
2309 const int N ( numeric_cast<int>( A.columns() ) );
2310 const int lda( numeric_cast<int>( A.spacing() ) );
2312 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, scalar,
2313 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2333 template<
typename VT1
2337 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2338 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2340 using boost::numeric_cast;
2350 const int M ( numeric_cast<int>( A.rows() ) );
2351 const int N ( numeric_cast<int>( A.columns() ) );
2352 const int lda( numeric_cast<int>( A.spacing() ) );
2353 const complex<float> alpha( scalar );
2354 const complex<float> beta ( 1.0F, 0.0F );
2356 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2357 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2377 template<
typename VT1
2381 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2382 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2384 using boost::numeric_cast;
2394 const int M ( numeric_cast<int>( A.rows() ) );
2395 const int N ( numeric_cast<int>( A.columns() ) );
2396 const int lda( numeric_cast<int>( A.spacing() ) );
2397 const complex<double> alpha( scalar );
2398 const complex<double> beta ( 1.0, 0.0 );
2400 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2401 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2422 template<
typename VT1 >
2423 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2427 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2428 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2430 if( left.rows() == 0UL || left.columns() == 0UL ) {
2442 if( ( IsComputation<MT>::value && !evaluate ) ||
2444 DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2446 DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
2464 template<
typename VT1
2468 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2469 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2471 y.subAssign( A * x * scalar );
2489 template<
typename VT1
2493 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2494 selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2496 typedef IntrinsicTrait<ElementType> IT;
2498 const size_t M( A.rows() );
2499 const size_t N( A.columns() );
2503 for( ; (i+8UL) <= M; i+=8UL ) {
2504 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2505 for(
size_t j=0UL; j<N; j+=IT::size ) {
2507 xmm1 = xmm1 + A.get(i ,j) * x1;
2508 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2509 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2510 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2511 xmm5 = xmm5 + A.get(i+4UL,j) * x1;
2512 xmm6 = xmm6 + A.get(i+5UL,j) * x1;
2513 xmm7 = xmm7 + A.get(i+6UL,j) * x1;
2514 xmm8 = xmm8 + A.get(i+7UL,j) * x1;
2516 y[i ] -=
sum( xmm1 ) * scalar;
2517 y[i+1UL] -=
sum( xmm2 ) * scalar;
2518 y[i+2UL] -=
sum( xmm3 ) * scalar;
2519 y[i+3UL] -=
sum( xmm4 ) * scalar;
2520 y[i+4UL] -=
sum( xmm5 ) * scalar;
2521 y[i+5UL] -=
sum( xmm6 ) * scalar;
2522 y[i+6UL] -=
sum( xmm7 ) * scalar;
2523 y[i+7UL] -=
sum( xmm8 ) * scalar;
2525 for( ; (i+4UL) <= M; i+=4UL ) {
2527 for(
size_t j=0UL; j<N; j+=IT::size ) {
2529 xmm1 = xmm1 + A.get(i ,j) * x1;
2530 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2531 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2532 xmm4 = xmm4 + A.get(i+3UL,j) * x1;
2534 y[i ] -=
sum( xmm1 ) * scalar;
2535 y[i+1UL] -=
sum( xmm2 ) * scalar;
2536 y[i+2UL] -=
sum( xmm3 ) * scalar;
2537 y[i+3UL] -=
sum( xmm4 ) * scalar;
2539 for( ; (i+3UL) <= M; i+=3UL ) {
2541 for(
size_t j=0UL; j<N; j+=IT::size ) {
2543 xmm1 = xmm1 + A.get(i ,j) * x1;
2544 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2545 xmm3 = xmm3 + A.get(i+2UL,j) * x1;
2547 y[i ] -=
sum( xmm1 ) * scalar;
2548 y[i+1UL] -=
sum( xmm2 ) * scalar;
2549 y[i+2UL] -=
sum( xmm3 ) * scalar;
2551 for( ; (i+2UL) <= M; i+=2UL ) {
2553 for(
size_t j=0UL; j<N; j+=IT::size ) {
2555 xmm1 = xmm1 + A.get(i ,j) * x1;
2556 xmm2 = xmm2 + A.get(i+1UL,j) * x1;
2558 y[i ] -=
sum( xmm1 ) * scalar;
2559 y[i+1UL] -=
sum( xmm2 ) * scalar;
2563 for(
size_t j=0UL; j<N; j+=IT::size ) {
2564 xmm1 = xmm1 + A.get(i,j) * x.get(j);
2566 y[i] -=
sum( xmm1 ) * scalar;
2585 template<
typename VT1
2589 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2590 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2592 selectDefaultSubAssignKernel( y, A, x, scalar );
2611 template<
typename VT1
2615 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2616 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2618 using boost::numeric_cast;
2624 const int M ( numeric_cast<int>( A.rows() ) );
2625 const int N ( numeric_cast<int>( A.columns() ) );
2626 const int lda( numeric_cast<int>( A.spacing() ) );
2628 cblas_sgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2629 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2649 template<
typename VT1
2653 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
2654 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2656 using boost::numeric_cast;
2662 const int M ( numeric_cast<int>( A.rows() ) );
2663 const int N ( numeric_cast<int>( A.columns() ) );
2664 const int lda( numeric_cast<int>( A.spacing() ) );
2666 cblas_dgemv( CblasRowMajor, CblasNoTrans, M, N, -scalar,
2667 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2687 template<
typename VT1
2691 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2692 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2694 using boost::numeric_cast;
2704 const int M ( numeric_cast<int>( A.rows() ) );
2705 const int N ( numeric_cast<int>( A.columns() ) );
2706 const int lda( numeric_cast<int>( A.spacing() ) );
2707 const complex<float> alpha( -scalar );
2708 const complex<float> beta ( 1.0F, 0.0F );
2710 cblas_cgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2711 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2731 template<
typename VT1
2735 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2736 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2738 using boost::numeric_cast;
2748 const int M ( numeric_cast<int>( A.rows() ) );
2749 const int N ( numeric_cast<int>( A.columns() ) );
2750 const int lda( numeric_cast<int>( A.spacing() ) );
2751 const complex<double> alpha( -scalar );
2752 const complex<double> beta ( 1.0, 0.0 );
2754 cblas_zgemv( CblasRowMajor, CblasNoTrans, M, N, &alpha,
2755 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2776 template<
typename VT1 >
2777 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2846 template<
typename T1
2848 inline const typename DisableIf< IsMatMatMultExpr<T1>, DMatDVecMultExpr<T1,T2> >::Type
2852 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
2880 template<
typename T1
2883 inline const typename EnableIf< IsMatMatMultExpr<T1>, MultExprTrait<T1,T2> >::Type::Type
2886 return (~mat).leftOperand() * ( (~mat).
rightOperand() * vec );