22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECTDMATMULTEXPR_H_
31 #include <boost/cast.hpp>
91 typedef typename VT::ResultType
VRT;
92 typedef typename MT::ResultType
MRT;
93 typedef typename VRT::ElementType
VET;
94 typedef typename MRT::ElementType
MET;
95 typedef typename VT::CompositeType
VCT;
96 typedef typename MT::CompositeType
MCT;
111 template<
typename T1,
typename T2,
typename T3 >
112 struct UseSinglePrecisionKernel {
126 template<
typename T1,
typename T2,
typename T3 >
127 struct UseDoublePrecisionKernel {
128 enum { value = IsDouble<typename T1::ElementType>::value &&
129 IsDouble<typename T2::ElementType>::value &&
130 IsDouble<typename T3::ElementType>::value };
141 template<
typename T1,
typename T2,
typename T3 >
142 struct UseSinglePrecisionComplexKernel {
143 typedef complex<float> Type;
144 enum { value = IsSame<typename T1::ElementType,Type>::value &&
145 IsSame<typename T2::ElementType,Type>::value &&
146 IsSame<typename T3::ElementType,Type>::value };
157 template<
typename T1,
typename T2,
typename T3 >
158 struct UseDoublePrecisionComplexKernel {
159 typedef complex<double> Type;
160 enum { value = IsSame<typename T1::ElementType,Type>::value &&
161 IsSame<typename T2::ElementType,Type>::value &&
162 IsSame<typename T3::ElementType,Type>::value };
172 template<
typename T1,
typename T2,
typename T3 >
173 struct UseDefaultKernel {
174 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
175 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
176 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
177 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
188 template<
typename T1,
typename T2,
typename T3 >
189 struct UseVectorizedDefaultKernel {
190 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
192 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
193 IntrinsicTrait<typename T1::ElementType>::addition &&
194 IntrinsicTrait<typename T1::ElementType>::multiplication };
224 enum { vectorizable = 0 };
258 if(
mat_.rows() != 0UL ) {
260 for(
size_t j=1UL; j<
end_; j+=2UL ) {
263 if( end_ < mat_.rows() ) {
281 return mat_.columns();
311 template<
typename T >
339 template<
typename VT1 >
344 if( rhs.
mat_.rows() == 0UL ) {
348 else if( rhs.
mat_.columns() == 0UL ) {
362 TDVecTDMatMultExpr::selectDefaultAssignKernel( ~lhs, x, A );
364 TDVecTDMatMultExpr::selectBlasAssignKernel( ~lhs, x, A );
383 template<
typename VT1
387 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
408 template<
typename VT1
411 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
412 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
414 typedef IntrinsicTrait<ElementType> IT;
416 const size_t M( A.rows() );
417 const size_t N( A.columns() );
421 for( ; (j+8UL) <= N; j+=8UL ) {
422 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
423 for(
size_t i=0UL; i<M; i+=IT::size ) {
425 xmm1 = xmm1 + x1 * A.get(i,j );
426 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
427 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
428 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
429 xmm5 = xmm5 + x1 * A.get(i,j+4UL);
430 xmm6 = xmm6 + x1 * A.get(i,j+5UL);
431 xmm7 = xmm7 + x1 * A.get(i,j+6UL);
432 xmm8 = xmm8 + x1 * A.get(i,j+7UL);
435 y[j+1UL] =
sum( xmm2 );
436 y[j+2UL] =
sum( xmm3 );
437 y[j+3UL] =
sum( xmm4 );
438 y[j+4UL] =
sum( xmm5 );
439 y[j+5UL] =
sum( xmm6 );
440 y[j+6UL] =
sum( xmm7 );
441 y[j+7UL] =
sum( xmm8 );
443 for( ; (j+4UL) <= N; j+=4UL ) {
445 for(
size_t i=0UL; i<M; i+=IT::size ) {
447 xmm1 = xmm1 + x1 * A.get(i,j );
448 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
449 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
450 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
453 y[j+1UL] =
sum( xmm2 );
454 y[j+2UL] =
sum( xmm3 );
455 y[j+3UL] =
sum( xmm4 );
457 for( ; (j+3UL) <= N; j+=3UL ) {
459 for(
size_t i=0UL; i<M; i+=IT::size ) {
461 xmm1 = xmm1 + x1 * A.get(i,j );
462 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
463 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
466 y[j+1UL] =
sum( xmm2 );
467 y[j+2UL] =
sum( xmm3 );
469 for( ; (j+2UL) <= N; j+=2UL ) {
471 for(
size_t i=0UL; i<M; i+=IT::size ) {
473 xmm1 = xmm1 + x1 * A.get(i,j );
474 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
477 y[j+1UL] =
sum( xmm2 );
481 for(
size_t i=0UL; i<M; i+=IT::size ) {
482 xmm1 = xmm1 + A.get(i,j) * x.get(i);
504 template<
typename VT1
507 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
508 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
510 selectDefaultAssignKernel( y, x, A );
530 template<
typename VT1
533 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
534 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
536 using boost::numeric_cast;
542 const int M ( numeric_cast<int>( A.rows() ) );
543 const int N ( numeric_cast<int>( A.columns() ) );
544 const int lda( numeric_cast<int>( A.spacing() ) );
546 cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
547 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
568 template<
typename VT1
571 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
572 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
574 using boost::numeric_cast;
580 const int M ( numeric_cast<int>( A.rows() ) );
581 const int N ( numeric_cast<int>( A.columns() ) );
582 const int lda( numeric_cast<int>( A.spacing() ) );
584 cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
585 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
606 template<
typename VT1
609 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
610 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
612 using boost::numeric_cast;
621 const int M ( numeric_cast<int>( A.rows() ) );
622 const int N ( numeric_cast<int>( A.columns() ) );
623 const int lda( numeric_cast<int>( A.spacing() ) );
624 const complex<float> alpha( 1.0F, 0.0F );
625 const complex<float> beta ( 0.0F, 0.0F );
627 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
628 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
649 template<
typename VT1
652 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
653 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
655 using boost::numeric_cast;
664 const int M ( numeric_cast<int>( A.rows() ) );
665 const int N ( numeric_cast<int>( A.columns() ) );
666 const int lda( numeric_cast<int>( A.spacing() ) );
667 const complex<double> alpha( 1.0, 0.0 );
668 const complex<double> beta ( 0.0, 0.0 );
670 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
671 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
690 template<
typename VT1 >
718 template<
typename VT1 >
723 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
735 if( ( IsComputation<MT>::value && !evaluate ) ||
737 TDVecTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A );
739 TDVecTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, x, A );
758 template<
typename VT1
761 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
762 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
764 y.addAssign( x * A );
783 template<
typename VT1
786 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
787 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
789 typedef IntrinsicTrait<ElementType> IT;
791 const size_t M( A.rows() );
792 const size_t N( A.columns() );
796 for( ; (j+8UL) <= N; j+=8UL ) {
797 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
798 for(
size_t i=0UL; i<M; i+=IT::size ) {
800 xmm1 = xmm1 + x1 * A.get(i,j );
801 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
802 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
803 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
804 xmm5 = xmm5 + x1 * A.get(i,j+4UL);
805 xmm6 = xmm6 + x1 * A.get(i,j+5UL);
806 xmm7 = xmm7 + x1 * A.get(i,j+6UL);
807 xmm8 = xmm8 + x1 * A.get(i,j+7UL);
809 y[j ] +=
sum( xmm1 );
810 y[j+1UL] +=
sum( xmm2 );
811 y[j+2UL] +=
sum( xmm3 );
812 y[j+3UL] +=
sum( xmm4 );
813 y[j+4UL] +=
sum( xmm5 );
814 y[j+5UL] +=
sum( xmm6 );
815 y[j+6UL] +=
sum( xmm7 );
816 y[j+7UL] +=
sum( xmm8 );
818 for( ; (j+4UL) <= N; j+=4UL ) {
820 for(
size_t i=0UL; i<M; i+=IT::size ) {
822 xmm1 = xmm1 + x1 * A.get(i,j );
823 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
824 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
825 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
827 y[j ] +=
sum( xmm1 );
828 y[j+1UL] +=
sum( xmm2 );
829 y[j+2UL] +=
sum( xmm3 );
830 y[j+3UL] +=
sum( xmm4 );
832 for( ; (j+3UL) <= N; j+=3UL ) {
834 for(
size_t i=0UL; i<M; i+=IT::size ) {
836 xmm1 = xmm1 + x1 * A.get(i,j );
837 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
838 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
840 y[j ] +=
sum( xmm1 );
841 y[j+1UL] +=
sum( xmm2 );
842 y[j+2UL] +=
sum( xmm3 );
844 for( ; (j+2UL) <= N; j+=2UL ) {
846 for(
size_t i=0UL; i<M; i+=IT::size ) {
848 xmm1 = xmm1 + x1 * A.get(i,j );
849 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
851 y[j ] +=
sum( xmm1 );
852 y[j+1UL] +=
sum( xmm2 );
856 for(
size_t i=0UL; i<M; i+=IT::size ) {
857 xmm1 = xmm1 + A.get(i,j) * x.get(i);
879 template<
typename VT1
882 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
883 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
885 selectDefaultAddAssignKernel( y, x, A );
905 template<
typename VT1
908 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
909 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
911 using boost::numeric_cast;
917 const int M ( numeric_cast<int>( A.rows() ) );
918 const int N ( numeric_cast<int>( A.columns() ) );
919 const int lda( numeric_cast<int>( A.spacing() ) );
921 cblas_sgemv( CblasColMajor, CblasTrans, M, N, 1.0F,
922 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
943 template<
typename VT1
946 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
947 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
949 using boost::numeric_cast;
955 const int M ( numeric_cast<int>( A.rows() ) );
956 const int N ( numeric_cast<int>( A.columns() ) );
957 const int lda( numeric_cast<int>( A.spacing() ) );
959 cblas_dgemv( CblasColMajor, CblasTrans, M, N, 1.0,
960 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
981 template<
typename VT1
984 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
985 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
987 using boost::numeric_cast;
996 const int M ( numeric_cast<int>( A.rows() ) );
997 const int N ( numeric_cast<int>( A.columns() ) );
998 const int lda( numeric_cast<int>( A.spacing() ) );
999 const complex<float> alpha( 1.0F, 0.0F );
1000 const complex<float> beta ( 1.0F, 0.0F );
1002 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1003 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1024 template<
typename VT1
1027 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1028 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1030 using boost::numeric_cast;
1039 const int M ( numeric_cast<int>( A.rows() ) );
1040 const int N ( numeric_cast<int>( A.columns() ) );
1041 const int lda( numeric_cast<int>( A.spacing() ) );
1042 const complex<double> alpha( 1.0, 0.0 );
1043 const complex<double> beta ( 1.0, 0.0 );
1045 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1046 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1069 template<
typename VT1 >
1074 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1086 if( ( IsComputation<MT>::value && !evaluate ) ||
1088 TDVecTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A );
1090 TDVecTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, x, A );
1109 template<
typename VT1
1112 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1113 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1115 y.subAssign( x * A );
1134 template<
typename VT1
1137 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1138 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1140 typedef IntrinsicTrait<ElementType> IT;
1142 const size_t M( A.rows() );
1143 const size_t N( A.columns() );
1147 for( ; (j+8UL) <= N; j+=8UL ) {
1148 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1149 for(
size_t i=0UL; i<M; i+=IT::size ) {
1151 xmm1 = xmm1 + x1 * A.get(i,j );
1152 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1153 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1154 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1155 xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1156 xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1157 xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1158 xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1160 y[j ] -=
sum( xmm1 );
1161 y[j+1UL] -=
sum( xmm2 );
1162 y[j+2UL] -=
sum( xmm3 );
1163 y[j+3UL] -=
sum( xmm4 );
1164 y[j+4UL] -=
sum( xmm5 );
1165 y[j+5UL] -=
sum( xmm6 );
1166 y[j+6UL] -=
sum( xmm7 );
1167 y[j+7UL] -=
sum( xmm8 );
1169 for( ; (j+4UL) <= N; j+=4UL ) {
1171 for(
size_t i=0UL; i<M; i+=IT::size ) {
1173 xmm1 = xmm1 + x1 * A.get(i,j );
1174 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1175 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1176 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1178 y[j ] -=
sum( xmm1 );
1179 y[j+1UL] -=
sum( xmm2 );
1180 y[j+2UL] -=
sum( xmm3 );
1181 y[j+3UL] -=
sum( xmm4 );
1183 for( ; (j+3UL) <= N; j+=3UL ) {
1185 for(
size_t i=0UL; i<M; i+=IT::size ) {
1187 xmm1 = xmm1 + x1 * A.get(i,j );
1188 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1189 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1191 y[j ] -=
sum( xmm1 );
1192 y[j+1UL] -=
sum( xmm2 );
1193 y[j+2UL] -=
sum( xmm3 );
1195 for( ; (j+2UL) <= N; j+=2UL ) {
1197 for(
size_t i=0UL; i<M; i+=IT::size ) {
1199 xmm1 = xmm1 + x1 * A.get(i,j );
1200 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1202 y[j ] -=
sum( xmm1 );
1203 y[j+1UL] -=
sum( xmm2 );
1207 for(
size_t i=0UL; i<M; i+=IT::size ) {
1208 xmm1 = xmm1 + A.get(i,j) * x.get(i);
1210 y[j] -=
sum( xmm1 );
1230 template<
typename VT1
1233 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1234 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1236 selectDefaultSubAssignKernel( y, x, A );
1256 template<
typename VT1
1259 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1260 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1262 using boost::numeric_cast;
1268 const int M ( numeric_cast<int>( A.rows() ) );
1269 const int N ( numeric_cast<int>( A.columns() ) );
1270 const int lda( numeric_cast<int>( A.spacing() ) );
1272 cblas_sgemv( CblasColMajor, CblasTrans, M, N, -1.0F,
1273 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1294 template<
typename VT1
1297 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1298 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1300 using boost::numeric_cast;
1306 const int M ( numeric_cast<int>( A.rows() ) );
1307 const int N ( numeric_cast<int>( A.columns() ) );
1308 const int lda( numeric_cast<int>( A.spacing() ) );
1310 cblas_dgemv( CblasColMajor, CblasTrans, M, N, -1.0,
1311 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1332 template<
typename VT1
1335 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1336 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1338 using boost::numeric_cast;
1347 const int M ( numeric_cast<int>( A.rows() ) );
1348 const int N ( numeric_cast<int>( A.columns() ) );
1349 const int lda( numeric_cast<int>( A.spacing() ) );
1350 const complex<float> alpha( -1.0F, 0.0F );
1351 const complex<float> beta ( 1.0F, 0.0F );
1353 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1354 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1375 template<
typename VT1
1378 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1379 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1381 using boost::numeric_cast;
1390 const int M ( numeric_cast<int>( A.rows() ) );
1391 const int N ( numeric_cast<int>( A.columns() ) );
1392 const int lda( numeric_cast<int>( A.spacing() ) );
1393 const complex<double> alpha( -1.0, 0.0 );
1394 const complex<double> beta ( 1.0, 0.0 );
1396 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1397 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1420 template<
typename VT1 >
1467 template<
typename VT
1471 :
public DenseVector< DVecScalarMultExpr< TDVecTDMatMultExpr<VT,MT>, ST, true >, true >
1472 ,
private Expression
1473 ,
private Computation
1478 typedef typename VMM::ResultType RES;
1479 typedef typename VT::ResultType
VRT;
1480 typedef typename MT::ResultType
MRT;
1481 typedef typename VRT::ElementType
VET;
1482 typedef typename MRT::ElementType
MET;
1483 typedef typename VT::CompositeType
VCT;
1484 typedef typename MT::CompositeType
MCT;
1489 enum { evaluate = IsComputation<MT>::value && !MT::vectorizable &&
1490 IsSame<VET,MET>::value && IsBlasCompatible<VET>::value };
1498 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1499 struct UseSinglePrecisionKernel {
1500 enum { value = IsFloat<typename T1::ElementType>::value &&
1501 IsFloat<typename T2::ElementType>::value &&
1502 IsFloat<typename T3::ElementType>::value &&
1503 !IsComplex<T4>::value };
1512 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1513 struct UseDoublePrecisionKernel {
1514 enum { value = IsDouble<typename T1::ElementType>::value &&
1515 IsDouble<typename T2::ElementType>::value &&
1516 IsDouble<typename T3::ElementType>::value &&
1517 !IsComplex<T4>::value };
1526 template<
typename T1,
typename T2,
typename T3 >
1527 struct UseSinglePrecisionComplexKernel {
1528 typedef complex<float> Type;
1529 enum { value = IsSame<typename T1::ElementType,Type>::value &&
1530 IsSame<typename T2::ElementType,Type>::value &&
1531 IsSame<typename T3::ElementType,Type>::value };
1540 template<
typename T1,
typename T2,
typename T3 >
1541 struct UseDoublePrecisionComplexKernel {
1542 typedef complex<double> Type;
1543 enum { value = IsSame<typename T1::ElementType,Type>::value &&
1544 IsSame<typename T2::ElementType,Type>::value &&
1545 IsSame<typename T3::ElementType,Type>::value };
1553 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1554 struct UseDefaultKernel {
1555 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1556 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1557 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1558 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1567 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1568 struct UseVectorizedDefaultKernel {
1569 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1570 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1571 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1572 IsSame<typename T1::ElementType,T4>::value &&
1573 IntrinsicTrait<typename T1::ElementType>::addition &&
1574 IntrinsicTrait<typename T1::ElementType>::multiplication };
1580 typedef DVecScalarMultExpr<VMM,ST,true>
This;
1581 typedef typename MultTrait<RES,ST>::Type
ResultType;
1583 typedef typename ResultType::ElementType
ElementType;
1584 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
1595 typedef typename SelectType< IsComputation<VT>::value,
const VRT,
VCT >::Type
LT;
1598 typedef typename SelectType< evaluate, const MRT, MCT >::Type
RT;
1603 enum { vectorizable = 0 };
1606 enum { canAlias = CanAlias<VMM>::value };
1615 explicit inline DVecScalarMultExpr(
const VMM& vector, ST scalar )
1629 return vector_[index] * scalar_;
1638 inline size_t size()
const {
1639 return vector_.size();
1669 template<
typename T >
1670 inline bool isAliased(
const T* alias )
const {
1671 return CanAlias<VMM>::value && vector_.isAliased( alias );
1693 template<
typename VT1
1695 friend inline void assign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
1699 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
1700 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
1702 if( right.rows() == 0UL ) {
1706 else if( right.columns() == 0UL ) {
1718 if( ( IsComputation<MT>::value && !evaluate ) ||
1720 DVecScalarMultExpr::selectDefaultAssignKernel( ~lhs, x, A, rhs.scalar_ );
1722 DVecScalarMultExpr::selectBlasAssignKernel( ~lhs, x, A, rhs.scalar_ );
1740 template<
typename VT1
1744 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1745 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
1747 y.assign( x * A * scalar );
1765 template<
typename VT1
1769 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1770 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
1772 typedef IntrinsicTrait<ElementType> IT;
1774 const size_t M( A.rows() );
1775 const size_t N( A.columns() );
1779 for( ; (j+8UL) <= N; j+=8UL ) {
1780 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1781 for(
size_t i=0UL; i<M; i+=IT::size ) {
1783 xmm1 = xmm1 + x1 * A.get(i,j );
1784 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1785 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1786 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1787 xmm5 = xmm5 + x1 * A.get(i,j+4UL);
1788 xmm6 = xmm6 + x1 * A.get(i,j+5UL);
1789 xmm7 = xmm7 + x1 * A.get(i,j+6UL);
1790 xmm8 = xmm8 + x1 * A.get(i,j+7UL);
1792 y[j ] =
sum( xmm1 ) * scalar;
1793 y[j+1UL] =
sum( xmm2 ) * scalar;
1794 y[j+2UL] =
sum( xmm3 ) * scalar;
1795 y[j+3UL] =
sum( xmm4 ) * scalar;
1796 y[j+4UL] =
sum( xmm5 ) * scalar;
1797 y[j+5UL] =
sum( xmm6 ) * scalar;
1798 y[j+6UL] =
sum( xmm7 ) * scalar;
1799 y[j+7UL] =
sum( xmm8 ) * scalar;
1801 for( ; (j+4UL) <= N; j+=4UL ) {
1803 for(
size_t i=0UL; i<M; i+=IT::size ) {
1805 xmm1 = xmm1 + x1 * A.get(i,j );
1806 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1807 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1808 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
1810 y[j ] =
sum( xmm1 ) * scalar;
1811 y[j+1UL] =
sum( xmm2 ) * scalar;
1812 y[j+2UL] =
sum( xmm3 ) * scalar;
1813 y[j+3UL] =
sum( xmm4 ) * scalar;
1815 for( ; (j+3UL) <= N; j+=3UL ) {
1817 for(
size_t i=0UL; i<M; i+=IT::size ) {
1819 xmm1 = xmm1 + x1 * A.get(i,j );
1820 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1821 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
1823 y[j ] =
sum( xmm1 ) * scalar;
1824 y[j+1UL] =
sum( xmm2 ) * scalar;
1825 y[j+2UL] =
sum( xmm3 ) * scalar;
1827 for( ; (j+2UL) <= N; j+=2UL ) {
1829 for(
size_t i=0UL; i<M; i+=IT::size ) {
1831 xmm1 = xmm1 + x1 * A.get(i,j );
1832 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
1834 y[j ] =
sum( xmm1 ) * scalar;
1835 y[j+1UL] =
sum( xmm2 ) * scalar;
1839 for(
size_t i=0UL; i<M; i+=IT::size ) {
1840 xmm1 = xmm1 + A.get(i,j) * x.get(i);
1842 y[j] =
sum( xmm1 ) * scalar;
1860 template<
typename VT1
1864 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
1865 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
1867 selectDefaultAssignKernel( y, x, A, scalar );
1886 template<
typename VT1
1890 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1891 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
1893 using boost::numeric_cast;
1899 const int M ( numeric_cast<int>( A.rows() ) );
1900 const int N ( numeric_cast<int>( A.columns() ) );
1901 const int lda( numeric_cast<int>( A.spacing() ) );
1903 cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
1904 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
1924 template<
typename VT1
1928 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
1929 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
1931 using boost::numeric_cast;
1937 const int M ( numeric_cast<int>( A.rows() ) );
1938 const int N ( numeric_cast<int>( A.columns() ) );
1939 const int lda( numeric_cast<int>( A.spacing() ) );
1941 cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
1942 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
1963 template<
typename VT1
1967 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1968 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
1970 using boost::numeric_cast;
1980 const int M ( numeric_cast<int>( A.rows() ) );
1981 const int N ( numeric_cast<int>( A.columns() ) );
1982 const int lda( numeric_cast<int>( A.spacing() ) );
1983 const complex<float> alpha( scalar );
1984 const complex<float> beta ( 0.0F, 0.0F );
1986 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
1987 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2008 template<
typename VT1
2012 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2013 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2015 using boost::numeric_cast;
2025 const int M ( numeric_cast<int>( A.rows() ) );
2026 const int N ( numeric_cast<int>( A.columns() ) );
2027 const int lda( numeric_cast<int>( A.spacing() ) );
2028 const complex<double> alpha( scalar );
2029 const complex<double> beta ( 0.0, 0.0 );
2031 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2032 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2049 template<
typename VT1
2076 template<
typename VT1
2078 friend inline void addAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2082 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2083 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2085 if( right.rows() == 0UL || right.columns() == 0UL ) {
2097 if( ( IsComputation<MT>::value && !evaluate ) ||
2099 DVecScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2101 DVecScalarMultExpr::selectBlasAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2119 template<
typename VT1
2123 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2124 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2126 y.addAssign( x * A * scalar );
2144 template<
typename VT1
2148 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2149 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2151 typedef IntrinsicTrait<ElementType> IT;
2153 const size_t M( A.rows() );
2154 const size_t N( A.columns() );
2158 for( ; (j+8UL) <= N; j+=8UL ) {
2159 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2160 for(
size_t i=0UL; i<M; i+=IT::size ) {
2162 xmm1 = xmm1 + x1 * A.get(i,j );
2163 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2164 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2165 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2166 xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2167 xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2168 xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2169 xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2171 y[j ] +=
sum( xmm1 ) * scalar;
2172 y[j+1UL] +=
sum( xmm2 ) * scalar;
2173 y[j+2UL] +=
sum( xmm3 ) * scalar;
2174 y[j+3UL] +=
sum( xmm4 ) * scalar;
2175 y[j+4UL] +=
sum( xmm5 ) * scalar;
2176 y[j+5UL] +=
sum( xmm6 ) * scalar;
2177 y[j+6UL] +=
sum( xmm7 ) * scalar;
2178 y[j+7UL] +=
sum( xmm8 ) * scalar;
2180 for( ; (j+4UL) <= N; j+=4UL ) {
2182 for(
size_t i=0UL; i<M; i+=IT::size ) {
2184 xmm1 = xmm1 + x1 * A.get(i,j );
2185 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2186 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2187 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2189 y[j ] +=
sum( xmm1 ) * scalar;
2190 y[j+1UL] +=
sum( xmm2 ) * scalar;
2191 y[j+2UL] +=
sum( xmm3 ) * scalar;
2192 y[j+3UL] +=
sum( xmm4 ) * scalar;
2194 for( ; (j+3UL) <= N; j+=3UL ) {
2196 for(
size_t i=0UL; i<M; i+=IT::size ) {
2198 xmm1 = xmm1 + x1 * A.get(i,j );
2199 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2200 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2202 y[j ] +=
sum( xmm1 ) * scalar;
2203 y[j+1UL] +=
sum( xmm2 ) * scalar;
2204 y[j+2UL] +=
sum( xmm3 ) * scalar;
2206 for( ; (j+2UL) <= N; j+=2UL ) {
2208 for(
size_t i=0UL; i<M; i+=IT::size ) {
2210 xmm1 = xmm1 + x1 * A.get(i,j );
2211 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2213 y[j ] +=
sum( xmm1 ) * scalar;
2214 y[j+1UL] +=
sum( xmm2 ) * scalar;
2218 for(
size_t i=0UL; i<M; i+=IT::size ) {
2219 xmm1 = xmm1 + A.get(i,j) * x.get(i);
2221 y[j] +=
sum( xmm1 ) * scalar;
2240 template<
typename VT1
2244 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2245 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2247 selectDefaultAddAssignKernel( y, x, A, scalar );
2266 template<
typename VT1
2270 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2271 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2273 using boost::numeric_cast;
2279 const int M ( numeric_cast<int>( A.rows() ) );
2280 const int N ( numeric_cast<int>( A.columns() ) );
2281 const int lda( numeric_cast<int>( A.spacing() ) );
2283 cblas_sgemv( CblasColMajor, CblasTrans, M, N, scalar,
2284 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2304 template<
typename VT1
2308 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2309 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2311 using boost::numeric_cast;
2317 const int M ( numeric_cast<int>( A.rows() ) );
2318 const int N ( numeric_cast<int>( A.columns() ) );
2319 const int lda( numeric_cast<int>( A.spacing() ) );
2321 cblas_dgemv( CblasColMajor, CblasTrans, M, N, scalar,
2322 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2343 template<
typename VT1
2347 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2348 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2350 using boost::numeric_cast;
2360 const int M ( numeric_cast<int>( A.rows() ) );
2361 const int N ( numeric_cast<int>( A.columns() ) );
2362 const int lda( numeric_cast<int>( A.spacing() ) );
2363 const complex<float> alpha( scalar );
2364 const complex<float> beta ( 1.0F, 0.0F );
2366 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2367 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2388 template<
typename VT1
2392 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2393 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2395 using boost::numeric_cast;
2405 const int M ( numeric_cast<int>( A.rows() ) );
2406 const int N ( numeric_cast<int>( A.columns() ) );
2407 const int lda( numeric_cast<int>( A.spacing() ) );
2408 const complex<double> alpha( scalar );
2409 const complex<double> beta ( 1.0, 0.0 );
2411 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2412 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2433 template<
typename VT1
2435 friend inline void subAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2439 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2440 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2442 if( right.rows() == 0UL || right.columns() == 0UL ) {
2454 if( ( IsComputation<MT>::value && !evaluate ) ||
2456 DVecScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2458 DVecScalarMultExpr::selectBlasSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
2476 template<
typename VT1
2480 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2481 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2483 y.subAssign( x * A * scalar );
2501 template<
typename VT1
2505 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2506 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2508 typedef IntrinsicTrait<ElementType> IT;
2510 const size_t M( A.rows() );
2511 const size_t N( A.columns() );
2515 for( ; (j+8UL) <= N; j+=8UL ) {
2516 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2517 for(
size_t i=0UL; i<M; i+=IT::size ) {
2519 xmm1 = xmm1 + x1 * A.get(i,j );
2520 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2521 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2522 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2523 xmm5 = xmm5 + x1 * A.get(i,j+4UL);
2524 xmm6 = xmm6 + x1 * A.get(i,j+5UL);
2525 xmm7 = xmm7 + x1 * A.get(i,j+6UL);
2526 xmm8 = xmm8 + x1 * A.get(i,j+7UL);
2528 y[j ] -=
sum( xmm1 ) * scalar;
2529 y[j+1UL] -=
sum( xmm2 ) * scalar;
2530 y[j+2UL] -=
sum( xmm3 ) * scalar;
2531 y[j+3UL] -=
sum( xmm4 ) * scalar;
2532 y[j+4UL] -=
sum( xmm5 ) * scalar;
2533 y[j+5UL] -=
sum( xmm6 ) * scalar;
2534 y[j+6UL] -=
sum( xmm7 ) * scalar;
2535 y[j+7UL] -=
sum( xmm8 ) * scalar;
2537 for( ; (j+4UL) <= N; j+=4UL ) {
2539 for(
size_t i=0UL; i<M; i+=IT::size ) {
2541 xmm1 = xmm1 + x1 * A.get(i,j );
2542 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2543 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2544 xmm4 = xmm4 + x1 * A.get(i,j+3UL);
2546 y[j ] -=
sum( xmm1 ) * scalar;
2547 y[j+1UL] -=
sum( xmm2 ) * scalar;
2548 y[j+2UL] -=
sum( xmm3 ) * scalar;
2549 y[j+3UL] -=
sum( xmm4 ) * scalar;
2551 for( ; (j+3UL) <= N; j+=3UL ) {
2553 for(
size_t i=0UL; i<M; i+=IT::size ) {
2555 xmm1 = xmm1 + x1 * A.get(i,j );
2556 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2557 xmm3 = xmm3 + x1 * A.get(i,j+2UL);
2559 y[j ] -=
sum( xmm1 ) * scalar;
2560 y[j+1UL] -=
sum( xmm2 ) * scalar;
2561 y[j+2UL] -=
sum( xmm3 ) * scalar;
2563 for( ; (j+2UL) <= N; j+=2UL ) {
2565 for(
size_t i=0UL; i<M; i+=IT::size ) {
2567 xmm1 = xmm1 + x1 * A.get(i,j );
2568 xmm2 = xmm2 + x1 * A.get(i,j+1UL);
2570 y[j ] -=
sum( xmm1 ) * scalar;
2571 y[j+1UL] -=
sum( xmm2 ) * scalar;
2575 for(
size_t i=0UL; i<M; i+=IT::size ) {
2576 xmm1 = xmm1 + A.get(i,j) * x.get(i);
2578 y[j] -=
sum( xmm1 ) * scalar;
2598 template<
typename VT1
2602 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2603 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2605 selectDefaultSubAssignKernel( y, x, A, scalar );
2624 template<
typename VT1
2628 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2629 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2631 using boost::numeric_cast;
2637 const int M ( numeric_cast<int>( A.rows() ) );
2638 const int N ( numeric_cast<int>( A.columns() ) );
2639 const int lda( numeric_cast<int>( A.spacing() ) );
2641 cblas_sgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2642 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2662 template<
typename VT1
2666 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2667 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2669 using boost::numeric_cast;
2675 const int M ( numeric_cast<int>( A.rows() ) );
2676 const int N ( numeric_cast<int>( A.columns() ) );
2677 const int lda( numeric_cast<int>( A.spacing() ) );
2679 cblas_dgemv( CblasColMajor, CblasTrans, M, N, -scalar,
2680 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2702 template<
typename VT1
2706 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2707 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2709 using boost::numeric_cast;
2719 const int M ( numeric_cast<int>( A.rows() ) );
2720 const int N ( numeric_cast<int>( A.columns() ) );
2721 const int lda( numeric_cast<int>( A.spacing() ) );
2722 const complex<float> alpha( -scalar );
2723 const complex<float> beta ( 1.0F, 0.0F );
2725 cblas_cgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2726 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2748 template<
typename VT1
2752 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2753 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2755 using boost::numeric_cast;
2765 const int M ( numeric_cast<int>( A.rows() ) );
2766 const int N ( numeric_cast<int>( A.columns() ) );
2767 const int lda( numeric_cast<int>( A.spacing() ) );
2768 const complex<double> alpha( -scalar );
2769 const complex<double> beta ( 1.0, 0.0 );
2771 cblas_zgemv( CblasColMajor, CblasTrans, M, N, &alpha,
2772 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2793 template<
typename VT1
2795 friend inline void multAssign( DenseVector<VT1,TF>& lhs,
const DVecScalarMultExpr& rhs )
2865 template<
typename T1
2870 if( (~vec).
size() != (~mat).
rows() )
2871 throw std::invalid_argument(
"Vector and matrix sizes do not match" );