35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
111 template<
typename MT
113 class TDMatDVecMultExpr :
public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
114 ,
private MatVecMultExpr
115 ,
private Computation
144 template<
typename T1 >
145 struct UseSMPAssign {
146 enum { value = ( evaluateMatrix || evaluateVector ) };
157 template<
typename T1,
typename T2,
typename T3 >
158 struct UseSinglePrecisionKernel {
164 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
178 template<
typename T1,
typename T2,
typename T3 >
179 struct UseDoublePrecisionKernel {
185 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
199 template<
typename T1,
typename T2,
typename T3 >
200 struct UseSinglePrecisionComplexKernel {
201 typedef complex<float> Type;
207 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
221 template<
typename T1,
typename T2,
typename T3 >
222 struct UseDoublePrecisionComplexKernel {
223 typedef complex<double> Type;
229 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
242 template<
typename T1,
typename T2,
typename T3 >
243 struct UseDefaultKernel {
244 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
245 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
246 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
247 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
258 template<
typename T1,
typename T2,
typename T3 >
259 struct UseVectorizedDefaultKernel {
261 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
296 MT::vectorizable && VT::vectorizable &&
302 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
303 !evaluateVector && VT::smpAssignable };
331 mat_.columns() == 0UL )
335 return mat_(index,index) *
vec_[index];
342 :(
mat_.columns() ) );
345 const size_t jnum( jend - jbegin );
346 const size_t jpos( jbegin + ( ( jnum - 1UL ) &
size_t(-2) ) + 1UL );
348 ElementType res(
mat_(index,jbegin) *
vec_[jbegin] );
350 for(
size_t j=jbegin+1UL; j<jpos; j+=2UL ) {
354 res +=
mat_(index,jpos) *
vec_[jpos];
397 template<
typename T >
399 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
409 template<
typename T >
411 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
421 return mat_.isAligned() &&
vec_.isAligned();
457 template<
typename VT1 >
464 if( rhs.mat_.rows() == 0UL ) {
467 else if( rhs.mat_.columns() == 0UL ) {
472 LT A(
serial( rhs.mat_ ) );
473 RT x(
serial( rhs.vec_ ) );
480 TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
496 template<
typename VT1
499 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
504 selectSmallAssignKernel( y, A, x );
506 selectBlasAssignKernel( y, A, x );
525 template<
typename VT1
528 static inline void selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
530 const size_t M( A.rows() );
531 const size_t N( A.columns() );
533 if( IsStrictlyLower<MT1>::value ) {
537 if( !IsUpper<MT1>::value )
539 for(
size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
540 y[i] = A(i,0UL) * x[0UL];
544 for(
size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
546 if( IsDiagonal<MT1>::value )
548 y[j] = A(j,j) * x[j];
552 const size_t ibegin( ( IsLower<MT1>::value )
553 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
555 const size_t iend( ( IsUpper<MT1>::value )
556 ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
560 const size_t inum( iend - ibegin );
561 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
563 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
564 y[i ] += A(i ,j) * x[j];
565 y[i+1UL] += A(i+1UL,j) * x[j];
568 y[ipos] += A(ipos,j) * x[j];
570 if( IsUpper<MT1>::value ) {
571 y[iend] = A(iend,j) * x[j];
576 if( IsStrictlyUpper<MT1>::value ) {
597 template<
typename VT1
600 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
601 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
603 selectDefaultAssignKernel( y, A, x );
622 template<
typename VT1
625 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
626 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
628 typedef IntrinsicTrait<ElementType> IT;
630 const size_t M( A.rows() );
631 const size_t N( A.columns() );
637 const size_t jbegin( ( IsUpper<MT1>::value )
638 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
640 const size_t jend( ( IsLower<MT1>::value )
641 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
645 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
647 for(
size_t j=jbegin; j<jend; ++j ) {
648 const IntrinsicType x1(
set( x[j] ) );
649 xmm1 = xmm1 + A.load(i ,j) * x1;
650 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
651 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
652 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
653 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
654 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
655 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
656 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
671 const size_t jbegin( ( IsUpper<MT1>::value )
672 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
674 const size_t jend( ( IsLower<MT1>::value )
675 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
679 IntrinsicType xmm1, xmm2, xmm3, xmm4;
681 for(
size_t j=jbegin; j<jend; ++j ) {
682 const IntrinsicType x1(
set( x[j] ) );
683 xmm1 = xmm1 + A.load(i ,j) * x1;
684 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
685 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
686 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
697 const size_t jbegin( ( IsUpper<MT1>::value )
698 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
700 const size_t jend( ( IsLower<MT1>::value )
701 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
705 IntrinsicType xmm1, xmm2, xmm3;
707 for(
size_t j=jbegin; j<jend; ++j ) {
708 const IntrinsicType x1(
set( x[j] ) );
709 xmm1 = xmm1 + A.load(i ,j) * x1;
710 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
711 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
721 const size_t jbegin( ( IsUpper<MT1>::value )
722 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
724 const size_t jend( ( IsLower<MT1>::value )
725 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
729 IntrinsicType xmm1, xmm2;
731 for(
size_t j=jbegin; j<jend; ++j ) {
732 const IntrinsicType x1(
set( x[j] ) );
733 xmm1 = xmm1 + A.load(i ,j) * x1;
734 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
743 const size_t jbegin( ( IsUpper<MT1>::value )
744 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
746 const size_t jend( ( IsLower<MT1>::value )
747 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
753 for(
size_t j=jbegin; j<jend; ++j ) {
754 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
777 template<
typename VT1
780 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
781 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
783 selectDefaultAssignKernel( y, A, x );
802 template<
typename VT1
805 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
806 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
808 typedef IntrinsicTrait<ElementType> IT;
810 const size_t M( A.rows() );
811 const size_t N( A.columns() );
813 const size_t iblock( 32768UL /
sizeof( ElementType ) );
814 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
820 for(
size_t ii=0U; ii<M; ii+=iblock ) {
821 for(
size_t jj=0UL; jj<N; jj+=jblock )
823 const size_t jend(
min( jj+jblock, N ) );
824 const size_t itmp(
min( ii+iblock, M ) );
825 const size_t iend( ( IsUpper<MT1>::value )
826 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
829 size_t i( ( IsLower<MT1>::value )
830 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
835 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
837 for(
size_t j=jj; j<jend; ++j ) {
838 const IntrinsicType x1(
set( x[j] ) );
839 xmm1 = xmm1 + A.load(i ,j) * x1;
840 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
841 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
842 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
843 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
844 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
845 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
846 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
849 y.store( i , y.load(i ) + xmm1 );
861 IntrinsicType xmm1, xmm2, xmm3, xmm4;
863 for(
size_t j=jj; j<jend; ++j ) {
864 const IntrinsicType x1(
set( x[j] ) );
865 xmm1 = xmm1 + A.load(i ,j) * x1;
866 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
867 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
868 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
871 y.store( i , y.load(i ) + xmm1 );
879 IntrinsicType xmm1, xmm2, xmm3;
881 for(
size_t j=jj; j<jend; ++j ) {
882 const IntrinsicType x1(
set( x[j] ) );
883 xmm1 = xmm1 + A.load(i ,j) * x1;
884 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
885 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
888 y.store( i , y.load(i ) + xmm1 );
895 IntrinsicType xmm1, xmm2;
897 for(
size_t j=jj; j<jend; ++j ) {
898 const IntrinsicType x1(
set( x[j] ) );
899 xmm1 = xmm1 + A.load(i ,j) * x1;
900 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
903 y.store( i , y.load(i ) + xmm1 );
911 for(
size_t j=jj; j<jend; ++j ) {
912 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
915 y.store( i, y.load(i) + xmm1 );
937 template<
typename VT1
940 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
941 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
943 selectLargeAssignKernel( y, A, x );
963 template<
typename VT1
966 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
967 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
969 if( IsTriangular<MT1>::value ) {
971 strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
974 sgemv( y, A, x, 1.0F, 0.0F );
996 template<
typename VT1
999 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1000 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1002 if( IsTriangular<MT1>::value ) {
1004 dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1007 dgemv( y, A, x, 1.0, 0.0 );
1029 template<
typename VT1
1032 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1033 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1035 if( IsTriangular<MT1>::value ) {
1037 ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1040 cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1062 template<
typename VT1
1065 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1066 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1068 if( IsTriangular<MT1>::value ) {
1070 ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1073 zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1093 template<
typename VT1 >
1104 const ResultType tmp(
serial( rhs ) );
1123 template<
typename VT1 >
1130 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1134 LT A(
serial( rhs.mat_ ) );
1135 RT x(
serial( rhs.vec_ ) );
1142 TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1158 template<
typename VT1
1161 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1163 if( ( IsDiagonal<MT1>::value ) ||
1164 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1166 selectSmallAddAssignKernel( y, A, x );
1168 selectBlasAddAssignKernel( y, A, x );
1187 template<
typename VT1
1190 static inline void selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1192 const size_t M( A.rows() );
1193 const size_t N( A.columns() );
1195 for(
size_t j=0UL; j<N; ++j )
1197 if( IsDiagonal<MT1>::value )
1199 y[j] += A(j,j) * x[j];
1203 const size_t ibegin( ( IsLower<MT1>::value )
1204 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1206 const size_t iend( ( IsUpper<MT1>::value )
1207 ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1211 const size_t inum( iend - ibegin );
1212 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
1214 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
1215 y[i ] += A(i ,j) * x[j];
1216 y[i+1UL] += A(i+1UL,j) * x[j];
1219 y[ipos] += A(ipos,j) * x[j];
1241 template<
typename VT1
1244 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1245 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1247 selectDefaultAddAssignKernel( y, A, x );
1266 template<
typename VT1
1269 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1270 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1272 typedef IntrinsicTrait<ElementType> IT;
1274 const size_t M( A.rows() );
1275 const size_t N( A.columns() );
1281 const size_t jbegin( ( IsUpper<MT1>::value )
1282 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1284 const size_t jend( ( IsLower<MT1>::value )
1285 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1289 IntrinsicType xmm1( y.load(i ) );
1290 IntrinsicType xmm2( y.load(i+
IT::size ) );
1291 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1292 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1293 IntrinsicType xmm5( y.load(i+
IT::size*4UL) );
1294 IntrinsicType xmm6( y.load(i+
IT::size*5UL) );
1295 IntrinsicType xmm7( y.load(i+
IT::size*6UL) );
1296 IntrinsicType xmm8( y.load(i+
IT::size*7UL) );
1298 for(
size_t j=jbegin; j<jend; ++j ) {
1299 const IntrinsicType x1(
set( x[j] ) );
1300 xmm1 = xmm1 + A.load(i ,j) * x1;
1301 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1302 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1303 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1304 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
1305 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
1306 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
1307 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
1310 y.store( i , xmm1 );
1322 const size_t jbegin( ( IsUpper<MT1>::value )
1323 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1325 const size_t jend( ( IsLower<MT1>::value )
1326 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1330 IntrinsicType xmm1( y.load(i ) );
1331 IntrinsicType xmm2( y.load(i+
IT::size ) );
1332 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1333 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1335 for(
size_t j=jbegin; j<jend; ++j ) {
1336 const IntrinsicType x1(
set( x[j] ) );
1337 xmm1 = xmm1 + A.load(i ,j) * x1;
1338 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1339 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1340 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1343 y.store( i , xmm1 );
1351 const size_t jbegin( ( IsUpper<MT1>::value )
1352 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1354 const size_t jend( ( IsLower<MT1>::value )
1355 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1359 IntrinsicType xmm1( y.load(i ) );
1360 IntrinsicType xmm2( y.load(i+
IT::size ) );
1361 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1363 for(
size_t j=jbegin; j<jend; ++j ) {
1364 const IntrinsicType x1(
set( x[j] ) );
1365 xmm1 = xmm1 + A.load(i ,j) * x1;
1366 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1367 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1370 y.store( i , xmm1 );
1377 const size_t jbegin( ( IsUpper<MT1>::value )
1378 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1380 const size_t jend( ( IsLower<MT1>::value )
1381 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1385 IntrinsicType xmm1( y.load(i ) );
1386 IntrinsicType xmm2( y.load(i+
IT::size) );
1388 for(
size_t j=jbegin; j<jend; ++j ) {
1389 const IntrinsicType x1(
set( x[j] ) );
1390 xmm1 = xmm1 + A.load(i ,j) * x1;
1391 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
1394 y.store( i , xmm1 );
1400 const size_t jbegin( ( IsUpper<MT1>::value )
1401 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1403 const size_t jend( ( IsLower<MT1>::value )
1404 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1408 IntrinsicType xmm1( y.load(i) );
1410 for(
size_t j=jbegin; j<jend; ++j ) {
1411 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
1434 template<
typename VT1
1437 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1438 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1440 selectDefaultAddAssignKernel( y, A, x );
1459 template<
typename VT1
1462 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1463 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1465 typedef IntrinsicTrait<ElementType> IT;
1467 const size_t M( A.rows() );
1468 const size_t N( A.columns() );
1470 const size_t iblock( 32768UL /
sizeof( ElementType ) );
1471 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1475 for(
size_t ii=0U; ii<M; ii+=iblock ) {
1476 for(
size_t jj=0UL; jj<N; jj+=jblock )
1478 const size_t jend(
min( jj+jblock, N ) );
1479 const size_t itmp(
min( ii+iblock, M ) );
1480 const size_t iend( ( IsUpper<MT1>::value )
1481 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1484 size_t i( ( IsLower<MT1>::value )
1485 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
1490 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1492 for(
size_t j=jj; j<jend; ++j ) {
1493 const IntrinsicType x1(
set( x[j] ) );
1494 xmm1 = xmm1 + A.load(i ,j) * x1;
1495 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1496 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1497 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1498 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
1499 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
1500 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
1501 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
1504 y.store( i , y.load(i ) + xmm1 );
1516 IntrinsicType xmm1, xmm2, xmm3, xmm4;
1518 for(
size_t j=jj; j<jend; ++j ) {
1519 const IntrinsicType x1(
set( x[j] ) );
1520 xmm1 = xmm1 + A.load(i ,j) * x1;
1521 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1522 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1523 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1526 y.store( i , y.load(i ) + xmm1 );
1534 IntrinsicType xmm1, xmm2, xmm3;
1536 for(
size_t j=jj; j<jend; ++j ) {
1537 const IntrinsicType x1(
set( x[j] ) );
1538 xmm1 = xmm1 + A.load(i ,j) * x1;
1539 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1540 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1543 y.store( i , y.load(i ) + xmm1 );
1550 IntrinsicType xmm1, xmm2;
1552 for(
size_t j=jj; j<jend; ++j ) {
1553 const IntrinsicType x1(
set( x[j] ) );
1554 xmm1 = xmm1 + A.load(i ,j) * x1;
1555 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
1558 y.store( i , y.load(i ) + xmm1 );
1566 for(
size_t j=jj; j<jend; ++j ) {
1567 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
1570 y.store( i, y.load(i) + xmm1 );
1592 template<
typename VT1
1595 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
1596 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1598 selectLargeAddAssignKernel( y, A, x );
1618 template<
typename VT1
1621 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
1622 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1624 if( IsTriangular<MT1>::value ) {
1626 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1630 sgemv( y, A, x, 1.0F, 1.0F );
1652 template<
typename VT1
1655 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
1656 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1658 if( IsTriangular<MT1>::value ) {
1660 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1664 dgemv( y, A, x, 1.0, 1.0 );
1686 template<
typename VT1
1689 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1690 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1692 if( IsTriangular<MT1>::value ) {
1694 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1698 cgemv( y, A, x, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1720 template<
typename VT1
1723 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
1724 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1726 if( IsTriangular<MT1>::value ) {
1728 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1732 zgemv( y, A, x, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1756 template<
typename VT1 >
1763 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1767 LT A(
serial( rhs.mat_ ) );
1768 RT x(
serial( rhs.vec_ ) );
1775 TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1791 template<
typename VT1
1794 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1796 if( ( IsDiagonal<MT1>::value ) ||
1797 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1799 selectSmallSubAssignKernel( y, A, x );
1801 selectBlasSubAssignKernel( y, A, x );
1820 template<
typename VT1
1823 static inline void selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1825 const size_t M( A.rows() );
1826 const size_t N( A.columns() );
1828 for(
size_t j=0UL; j<N; ++j )
1830 if( IsDiagonal<MT1>::value )
1832 y[j] -= A(j,j) * x[j];
1836 const size_t ibegin( ( IsLower<MT1>::value )
1837 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1839 const size_t iend( ( IsUpper<MT1>::value )
1840 ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1844 const size_t inum( iend - ibegin );
1845 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
1847 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
1848 y[i ] -= A(i ,j) * x[j];
1849 y[i+1UL] -= A(i+1UL,j) * x[j];
1852 y[ipos] -= A(ipos,j) * x[j];
1874 template<
typename VT1
1877 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1878 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1880 selectDefaultSubAssignKernel( y, A, x );
1900 template<
typename VT1
1903 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1904 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1906 typedef IntrinsicTrait<ElementType> IT;
1908 const size_t M( A.rows() );
1909 const size_t N( A.columns() );
1915 const size_t jbegin( ( IsUpper<MT1>::value )
1916 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1918 const size_t jend( ( IsLower<MT1>::value )
1919 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1923 IntrinsicType xmm1( y.load(i ) );
1924 IntrinsicType xmm2( y.load(i+
IT::size ) );
1925 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1926 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1927 IntrinsicType xmm5( y.load(i+
IT::size*4UL) );
1928 IntrinsicType xmm6( y.load(i+
IT::size*5UL) );
1929 IntrinsicType xmm7( y.load(i+
IT::size*6UL) );
1930 IntrinsicType xmm8( y.load(i+
IT::size*7UL) );
1932 for(
size_t j=jbegin; j<jend; ++j ) {
1933 const IntrinsicType x1(
set( x[j] ) );
1934 xmm1 = xmm1 - A.load(i ,j) * x1;
1935 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1936 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1937 xmm4 = xmm4 - A.load(i+
IT::size*3UL,j) * x1;
1938 xmm5 = xmm5 - A.load(i+
IT::size*4UL,j) * x1;
1939 xmm6 = xmm6 - A.load(i+
IT::size*5UL,j) * x1;
1940 xmm7 = xmm7 - A.load(i+
IT::size*6UL,j) * x1;
1941 xmm8 = xmm8 - A.load(i+
IT::size*7UL,j) * x1;
1944 y.store( i , xmm1 );
1956 const size_t jbegin( ( IsUpper<MT1>::value )
1957 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1959 const size_t jend( ( IsLower<MT1>::value )
1960 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1964 IntrinsicType xmm1( y.load(i ) );
1965 IntrinsicType xmm2( y.load(i+
IT::size ) );
1966 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1967 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1969 for(
size_t j=jbegin; j<jend; ++j ) {
1970 const IntrinsicType x1(
set( x[j] ) );
1971 xmm1 = xmm1 - A.load(i ,j) * x1;
1972 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1973 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1974 xmm4 = xmm4 - A.load(i+
IT::size*3UL,j) * x1;
1977 y.store( i , xmm1 );
1985 const size_t jbegin( ( IsUpper<MT1>::value )
1986 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1988 const size_t jend( ( IsLower<MT1>::value )
1989 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1993 IntrinsicType xmm1( y.load(i ) );
1994 IntrinsicType xmm2( y.load(i+
IT::size ) );
1995 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1997 for(
size_t j=jbegin; j<jend; ++j ) {
1998 const IntrinsicType x1(
set( x[j] ) );
1999 xmm1 = xmm1 - A.load(i ,j) * x1;
2000 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
2001 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
2004 y.store( i , xmm1 );
2011 const size_t jbegin( ( IsUpper<MT1>::value )
2012 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2014 const size_t jend( ( IsLower<MT1>::value )
2015 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2019 IntrinsicType xmm1( y.load(i ) );
2020 IntrinsicType xmm2( y.load(i+
IT::size) );
2022 for(
size_t j=jbegin; j<jend; ++j ) {
2023 const IntrinsicType x1(
set( x[j] ) );
2024 xmm1 = xmm1 - A.load(i ,j) * x1;
2025 xmm2 = xmm2 - A.load(i+
IT::size,j) * x1;
2028 y.store( i , xmm1 );
2034 const size_t jbegin( ( IsUpper<MT1>::value )
2035 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2037 const size_t jend( ( IsLower<MT1>::value )
2038 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2042 IntrinsicType xmm1( y.load(i) );
2044 for(
size_t j=jbegin; j<jend; ++j ) {
2045 xmm1 = xmm1 - A.load(i,j) *
set( x[j] );
2068 template<
typename VT1
2071 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
2072 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2074 selectDefaultSubAssignKernel( y, A, x );
2094 template<
typename VT1
2097 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
2098 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2100 typedef IntrinsicTrait<ElementType> IT;
2102 const size_t M( A.rows() );
2103 const size_t N( A.columns() );
2105 const size_t iblock( 32768UL /
sizeof( ElementType ) );
2106 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
2110 for(
size_t ii=0U; ii<M; ii+=iblock ) {
2111 for(
size_t jj=0UL; jj<N; jj+=jblock )
2113 const size_t jend(
min( jj+jblock, N ) );
2114 const size_t itmp(
min( ii+iblock, M ) );
2115 const size_t iend( ( IsUpper<MT1>::value )
2116 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
2119 size_t i( ( IsLower<MT1>::value )
2120 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
2125 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2127 for(
size_t j=jj; j<jend; ++j ) {
2128 const IntrinsicType x1(
set( x[j] ) );
2129 xmm1 = xmm1 + A.load(i ,j) * x1;
2130 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2131 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2132 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2133 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
2134 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
2135 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
2136 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
2139 y.store( i , y.load(i ) - xmm1 );
2151 IntrinsicType xmm1, xmm2, xmm3, xmm4;
2153 for(
size_t j=jj; j<jend; ++j ) {
2154 const IntrinsicType x1(
set( x[j] ) );
2155 xmm1 = xmm1 + A.load(i ,j) * x1;
2156 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2157 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2158 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2161 y.store( i , y.load(i ) - xmm1 );
2169 IntrinsicType xmm1, xmm2, xmm3;
2171 for(
size_t j=jj; j<jend; ++j ) {
2172 const IntrinsicType x1(
set( x[j] ) );
2173 xmm1 = xmm1 + A.load(i ,j) * x1;
2174 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2175 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2178 y.store( i , y.load(i ) - xmm1 );
2185 IntrinsicType xmm1, xmm2;
2187 for(
size_t j=jj; j<jend; ++j ) {
2188 const IntrinsicType x1(
set( x[j] ) );
2189 xmm1 = xmm1 + A.load(i ,j) * x1;
2190 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
2193 y.store( i , y.load(i ) - xmm1 );
2201 for(
size_t j=jj; j<jend; ++j ) {
2202 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
2205 y.store( i, y.load(i) - xmm1 );
2227 template<
typename VT1
2230 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
2231 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2233 selectLargeSubAssignKernel( y, A, x );
2253 template<
typename VT1
2256 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2> >::Type
2257 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2259 if( IsTriangular<MT1>::value ) {
2261 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2265 sgemv( y, A, x, -1.0F, 1.0F );
2287 template<
typename VT1
2290 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2> >::Type
2291 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2293 if( IsTriangular<MT1>::value ) {
2295 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2299 dgemv( y, A, x, -1.0, 1.0 );
2321 template<
typename VT1
2324 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2325 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2327 if( IsTriangular<MT1>::value ) {
2329 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2333 cgemv( y, A, x, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2355 template<
typename VT1
2358 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
2359 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2361 if( IsTriangular<MT1>::value ) {
2363 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2367 zgemv( y, A, x, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2391 template<
typename VT1 >
2402 const ResultType tmp(
serial( rhs ) );
2427 template<
typename VT1 >
2428 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2435 if( rhs.mat_.rows() == 0UL ) {
2438 else if( rhs.mat_.columns() == 0UL ) {
2471 template<
typename VT1 >
2472 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2483 const ResultType tmp( rhs );
2504 template<
typename VT1 >
2505 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2512 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2548 template<
typename VT1 >
2549 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2556 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2592 template<
typename VT1 >
2593 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2604 const ResultType tmp( rhs );
2644 template<
typename MT
2648 :
public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
2649 ,
private VecScalarMultExpr
2650 ,
private Computation
2654 typedef TDMatDVecMultExpr<MT,VT> MVM;
2666 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2667 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2672 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2680 template<
typename T1 >
2681 struct UseSMPAssign {
2682 enum { value = ( evaluateMatrix || evaluateVector ) };
2691 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2692 struct UseSinglePrecisionKernel {
2694 HasMutableDataAccess<T1>::value &&
2695 HasConstDataAccess<T2>::value &&
2696 HasConstDataAccess<T3>::value &&
2697 !IsDiagonal<T2>::value &&
2698 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2699 IsFloat<typename T1::ElementType>::value &&
2700 IsFloat<typename T2::ElementType>::value &&
2701 IsFloat<typename T3::ElementType>::value &&
2702 !IsComplex<T4>::value };
2711 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2712 struct UseDoublePrecisionKernel {
2714 HasMutableDataAccess<T1>::value &&
2715 HasConstDataAccess<T2>::value &&
2716 HasConstDataAccess<T3>::value &&
2717 !IsDiagonal<T2>::value &&
2718 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2719 IsDouble<typename T1::ElementType>::value &&
2720 IsDouble<typename T2::ElementType>::value &&
2721 IsDouble<typename T3::ElementType>::value &&
2722 !IsComplex<T4>::value };
2731 template<
typename T1,
typename T2,
typename T3 >
2732 struct UseSinglePrecisionComplexKernel {
2733 typedef complex<float> Type;
2735 HasMutableDataAccess<T1>::value &&
2736 HasConstDataAccess<T2>::value &&
2737 HasConstDataAccess<T3>::value &&
2738 !IsDiagonal<T2>::value &&
2739 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2740 IsSame<typename T1::ElementType,Type>::value &&
2741 IsSame<typename T2::ElementType,Type>::value &&
2742 IsSame<typename T3::ElementType,Type>::value };
2751 template<
typename T1,
typename T2,
typename T3 >
2752 struct UseDoublePrecisionComplexKernel {
2753 typedef complex<double> Type;
2755 HasMutableDataAccess<T1>::value &&
2756 HasConstDataAccess<T2>::value &&
2757 HasConstDataAccess<T3>::value &&
2758 !IsDiagonal<T2>::value &&
2759 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2760 IsSame<typename T1::ElementType,Type>::value &&
2761 IsSame<typename T2::ElementType,Type>::value &&
2762 IsSame<typename T3::ElementType,Type>::value };
2770 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2771 struct UseDefaultKernel {
2772 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2773 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2774 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2775 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2784 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2785 struct UseVectorizedDefaultKernel {
2786 enum { value = !IsDiagonal<T2>::value &&
2787 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2788 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2789 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2790 IsSame<typename T1::ElementType,T4>::value &&
2791 IntrinsicTrait<typename T1::ElementType>::addition &&
2792 IntrinsicTrait<typename T1::ElementType>::multiplication };
2798 typedef DVecScalarMultExpr<MVM,ST,false>
This;
2799 typedef typename MultTrait<RES,ST>::Type
ResultType;
2802 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2807 typedef const TDMatDVecMultExpr<MT,VT>
LeftOperand;
2813 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
LT;
2816 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
RT;
2821 enum { vectorizable = !IsDiagonal<MT>::value &&
2822 MT::vectorizable && VT::vectorizable &&
2823 IsSame<MET,VET>::value &&
2824 IsSame<MET,ST>::value &&
2825 IntrinsicTrait<MET>::addition &&
2826 IntrinsicTrait<MET>::multiplication };
2829 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2830 !evaluateVector && VT::smpAssignable };
2839 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
2851 inline ReturnType
operator[](
size_t index )
const {
2853 return vector_[index] * scalar_;
2862 inline size_t size()
const {
2863 return vector_.size();
2893 template<
typename T >
2894 inline bool canAlias(
const T* alias )
const {
2895 return vector_.canAlias( alias );
2905 template<
typename T >
2906 inline bool isAliased(
const T* alias )
const {
2907 return vector_.isAliased( alias );
2917 return vector_.isAligned();
2927 typename MVM::LeftOperand A( vector_.leftOperand() );
2929 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2937 LeftOperand vector_;
2938 RightOperand scalar_;
2953 template<
typename VT1 >
2954 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2960 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2961 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2963 if( left.rows() == 0UL ) {
2966 else if( left.columns() == 0UL ) {
2979 DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2994 template<
typename VT1
2998 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3000 if( ( IsDiagonal<MT1>::value ) ||
3001 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3003 selectSmallAssignKernel( y, A, x, scalar );
3005 selectBlasAssignKernel( y, A, x, scalar );
3023 template<
typename VT1
3027 static inline void selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3029 const size_t M( A.rows() );
3030 const size_t N( A.columns() );
3032 if( IsStrictlyLower<MT1>::value ) {
3036 if( !IsUpper<MT1>::value )
3038 for(
size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
3039 y[i] = A(i,0UL) * x[0UL];
3043 for(
size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
3045 if( IsDiagonal<MT1>::value )
3047 y[j] = A(j,j) * x[j] * scalar;
3051 const size_t ibegin( ( IsLower<MT1>::value )
3052 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3054 const size_t iend( ( IsUpper<MT1>::value )
3055 ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
3059 const size_t inum( iend - ibegin );
3060 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
3062 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
3063 y[i ] += A(i ,j) * x[j];
3064 y[i+1UL] += A(i+1UL,j) * x[j];
3067 y[ipos] += A(ipos,j) * x[j];
3069 if( IsUpper<MT1>::value ) {
3070 y[iend] = A(iend,j) * x[j];
3075 if( IsStrictlyUpper<MT1>::value ) {
3079 if( !IsDiagonal<MT1>::value )
3081 const size_t iend( IsStrictlyUpper<MT1>::value ? M-1UL : M );
3082 for(
size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<iend; ++i ) {
3103 template<
typename VT1
3107 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3108 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3110 selectDefaultAssignKernel( y, A, x, scalar );
3128 template<
typename VT1
3132 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3133 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3135 typedef IntrinsicTrait<ElementType> IT;
3137 const size_t M( A.rows() );
3138 const size_t N( A.columns() );
3140 const IntrinsicType factor(
set( scalar ) );
3146 const size_t jbegin( ( IsUpper<MT1>::value )
3147 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3149 const size_t jend( ( IsLower<MT1>::value )
3150 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3154 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3156 for(
size_t j=jbegin; j<jend; ++j ) {
3157 const IntrinsicType x1(
set( x[j] ) );
3158 xmm1 = xmm1 + A.load(i ,j) * x1;
3159 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3160 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3161 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3162 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3163 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3164 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3165 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3168 y.store( i , xmm1*factor );
3169 y.store( i+
IT::size , xmm2*factor );
3170 y.store( i+
IT::size*2UL, xmm3*factor );
3171 y.store( i+
IT::size*3UL, xmm4*factor );
3172 y.store( i+
IT::size*4UL, xmm5*factor );
3173 y.store( i+
IT::size*5UL, xmm6*factor );
3174 y.store( i+
IT::size*6UL, xmm7*factor );
3175 y.store( i+
IT::size*7UL, xmm8*factor );
3180 const size_t jbegin( ( IsUpper<MT1>::value )
3181 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3183 const size_t jend( ( IsLower<MT1>::value )
3184 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3188 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3190 for(
size_t j=jbegin; j<jend; ++j ) {
3191 const IntrinsicType x1(
set( x[j] ) );
3192 xmm1 = xmm1 + A.load(i ,j) * x1;
3193 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3194 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3195 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3198 y.store( i , xmm1*factor );
3199 y.store( i+
IT::size , xmm2*factor );
3200 y.store( i+
IT::size*2UL, xmm3*factor );
3201 y.store( i+
IT::size*3UL, xmm4*factor );
3206 const size_t jbegin( ( IsUpper<MT1>::value )
3207 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3209 const size_t jend( ( IsLower<MT1>::value )
3210 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3214 IntrinsicType xmm1, xmm2, xmm3;
3216 for(
size_t j=jbegin; j<jend; ++j ) {
3217 const IntrinsicType x1(
set( x[j] ) );
3218 xmm1 = xmm1 + A.load(i ,j) * x1;
3219 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3220 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3223 y.store( i , xmm1*factor );
3224 y.store( i+
IT::size , xmm2*factor );
3225 y.store( i+
IT::size*2UL, xmm3*factor );
3230 const size_t jbegin( ( IsUpper<MT1>::value )
3231 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3233 const size_t jend( ( IsLower<MT1>::value )
3234 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3238 IntrinsicType xmm1, xmm2;
3240 for(
size_t j=jbegin; j<jend; ++j ) {
3241 const IntrinsicType x1(
set( x[j] ) );
3242 xmm1 = xmm1 + A.load(i ,j) * x1;
3243 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3246 y.store( i , xmm1*factor );
3247 y.store( i+
IT::size, xmm2*factor );
3252 const size_t jbegin( ( IsUpper<MT1>::value )
3253 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3255 const size_t jend( ( IsLower<MT1>::value )
3256 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3262 for(
size_t j=jbegin; j<jend; ++j ) {
3263 const IntrinsicType x1(
set( x[j] ) );
3264 xmm1 = xmm1 + A.load(i,j) * x1;
3267 y.store( i, xmm1*factor );
3286 template<
typename VT1
3290 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3291 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3293 selectDefaultAssignKernel( y, A, x, scalar );
3311 template<
typename VT1
3315 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3316 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3318 typedef IntrinsicTrait<ElementType> IT;
3320 const size_t M( A.rows() );
3321 const size_t N( A.columns() );
3323 const size_t iblock( 32768UL /
sizeof( ElementType ) );
3324 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3326 const IntrinsicType factor(
set( scalar ) );
3332 for(
size_t ii=0U; ii<M; ii+=iblock ) {
3333 for(
size_t jj=0UL; jj<N; jj+=jblock )
3335 const size_t jend(
min( jj+jblock, N ) );
3336 const size_t itmp(
min( ii+iblock, M ) );
3337 const size_t iend( ( IsUpper<MT1>::value )
3338 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3341 size_t i( ( IsLower<MT1>::value )
3342 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
3347 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3349 for(
size_t j=jj; j<jend; ++j ) {
3350 const IntrinsicType x1(
set( x[j] ) );
3351 xmm1 = xmm1 + A.load(i ,j) * x1;
3352 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3353 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3354 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3355 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3356 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3357 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3358 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3361 y.store( i , y.load(i ) + xmm1*factor );
3373 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3375 for(
size_t j=jj; j<jend; ++j ) {
3376 const IntrinsicType x1(
set( x[j] ) );
3377 xmm1 = xmm1 + A.load(i ,j) * x1;
3378 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3379 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3380 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3383 y.store( i , y.load(i ) + xmm1*factor );
3391 IntrinsicType xmm1, xmm2, xmm3;
3393 for(
size_t j=jj; j<jend; ++j ) {
3394 const IntrinsicType x1(
set( x[j] ) );
3395 xmm1 = xmm1 + A.load(i ,j) * x1;
3396 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3397 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3400 y.store( i , y.load(i ) + xmm1*factor );
3407 IntrinsicType xmm1, xmm2;
3409 for(
size_t j=jj; j<jend; ++j ) {
3410 const IntrinsicType x1(
set( x[j] ) );
3411 xmm1 = xmm1 + A.load(i ,j) * x1;
3412 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3415 y.store( i , y.load(i ) + xmm1*factor );
3423 for(
size_t j=jj; j<jend; ++j ) {
3424 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
3427 y.store( i, y.load(i) + xmm1*factor );
3448 template<
typename VT1
3452 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3453 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3455 selectLargeAssignKernel( y, A, x, scalar );
3474 template<
typename VT1
3478 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3479 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3481 if( IsTriangular<MT1>::value ) {
3483 strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3486 sgemv( y, A, x, scalar, 0.0F );
3507 template<
typename VT1
3511 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
3512 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3514 if( IsTriangular<MT1>::value ) {
3516 dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3519 dgemv( y, A, x, scalar, 0.0 );
3540 template<
typename VT1
3544 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3545 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3547 if( IsTriangular<MT1>::value ) {
3549 ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3552 cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3573 template<
typename VT1
3577 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
3578 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3580 if( IsTriangular<MT1>::value ) {
3582 ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3585 zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3603 template<
typename VT1 >
3604 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3614 const ResultType tmp(
serial( rhs ) );
3631 template<
typename VT1 >
3632 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3638 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3639 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3641 if( left.rows() == 0UL || left.columns() == 0UL ) {
3653 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3668 template<
typename VT1
3672 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3674 if( ( IsDiagonal<MT1>::value ) ||
3675 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3677 selectSmallAddAssignKernel( y, A, x, scalar );
3679 selectBlasAddAssignKernel( y, A, x, scalar );
3697 template<
typename VT1
3701 static inline void selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3703 y.addAssign( A * x * scalar );
3721 template<
typename VT1
3725 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3726 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3728 selectDefaultAddAssignKernel( y, A, x, scalar );
3747 template<
typename VT1
3751 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3752 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3754 typedef IntrinsicTrait<ElementType> IT;
3756 const size_t M( A.rows() );
3757 const size_t N( A.columns() );
3759 const IntrinsicType factor(
set( scalar ) );
3765 const size_t jbegin( ( IsUpper<MT1>::value )
3766 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3768 const size_t jend( ( IsLower<MT1>::value )
3769 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3773 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3775 for(
size_t j=jbegin; j<jend; ++j ) {
3776 const IntrinsicType x1(
set( x[j] ) );
3777 xmm1 = xmm1 + A.load(i ,j) * x1;
3778 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3779 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3780 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3781 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3782 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3783 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3784 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3787 y.store( i , y.load(i ) + xmm1*factor );
3799 const size_t jbegin( ( IsUpper<MT1>::value )
3800 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3802 const size_t jend( ( IsLower<MT1>::value )
3803 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3807 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3809 for(
size_t j=jbegin; j<jend; ++j ) {
3810 const IntrinsicType x1(
set( x[j] ) );
3811 xmm1 = xmm1 + A.load(i ,j) * x1;
3812 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3813 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3814 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3817 y.store( i , y.load(i ) + xmm1*factor );
3825 const size_t jbegin( ( IsUpper<MT1>::value )
3826 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3828 const size_t jend( ( IsLower<MT1>::value )
3829 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3833 IntrinsicType xmm1, xmm2, xmm3;
3835 for(
size_t j=jbegin; j<jend; ++j ) {
3836 const IntrinsicType x1(
set( x[j] ) );
3837 xmm1 = xmm1 + A.load(i ,j) * x1;
3838 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3839 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3842 y.store( i , y.load(i ) + xmm1*factor );
3849 const size_t jbegin( ( IsUpper<MT1>::value )
3850 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3852 const size_t jend( ( IsLower<MT1>::value )
3853 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3857 IntrinsicType xmm1, xmm2;
3859 for(
size_t j=jbegin; j<jend; ++j ) {
3860 const IntrinsicType x1(
set( x[j] ) );
3861 xmm1 = xmm1 + A.load(i ,j) * x1;
3862 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3865 y.store( i , y.load(i ) + xmm1*factor );
3871 const size_t jbegin( ( IsUpper<MT1>::value )
3872 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3874 const size_t jend( ( IsLower<MT1>::value )
3875 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3881 for(
size_t j=jbegin; j<jend; ++j ) {
3882 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
3885 y.store( i, y.load(i) + xmm1*factor );
3904 template<
typename VT1
3908 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3909 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3911 selectDefaultAddAssignKernel( y, A, x, scalar );
3930 template<
typename VT1
3934 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3935 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3937 typedef IntrinsicTrait<ElementType> IT;
3939 const size_t M( A.rows() );
3940 const size_t N( A.columns() );
3942 const size_t iblock( 32768UL /
sizeof( ElementType ) );
3943 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3945 const IntrinsicType factor(
set( scalar ) );
3949 for(
size_t ii=0U; ii<M; ii+=iblock ) {
3950 for(
size_t jj=0UL; jj<N; jj+=jblock )
3952 const size_t jend(
min( jj+jblock, N ) );
3953 const size_t itmp(
min( ii+iblock, M ) );
3954 const size_t iend( ( IsUpper<MT1>::value )
3955 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3958 size_t i( ( IsLower<MT1>::value )
3959 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
3964 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3966 for(
size_t j=jj; j<jend; ++j ) {
3967 const IntrinsicType x1(
set( x[j] ) );
3968 xmm1 = xmm1 + A.load(i ,j) * x1;
3969 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3970 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3971 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3972 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3973 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3974 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3975 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3978 y.store( i , y.load(i ) + xmm1*factor );
3990 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3992 for(
size_t j=jj; j<jend; ++j ) {
3993 const IntrinsicType x1(
set( x[j] ) );
3994 xmm1 = xmm1 + A.load(i ,j) * x1;
3995 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3996 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3997 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4000 y.store( i , y.load(i ) + xmm1*factor );
4008 IntrinsicType xmm1, xmm2, xmm3;
4010 for(
size_t j=jj; j<jend; ++j ) {
4011 const IntrinsicType x1(
set( x[j] ) );
4012 xmm1 = xmm1 + A.load(i ,j) * x1;
4013 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4014 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4017 y.store( i , y.load(i ) + xmm1*factor );
4024 IntrinsicType xmm1, xmm2;
4026 for(
size_t j=jj; j<jend; ++j ) {
4027 const IntrinsicType x1(
set( x[j] ) );
4028 xmm1 = xmm1 + A.load(i ,j) * x1;
4029 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
4032 y.store( i , y.load(i ) + xmm1*factor );
4040 for(
size_t j=jj; j<jend; ++j ) {
4041 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
4044 y.store( i, y.load(i) + xmm1*factor );
4065 template<
typename VT1
4069 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4070 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4072 selectLargeAddAssignKernel( y, A, x, scalar );
4091 template<
typename VT1
4095 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4096 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4098 if( IsTriangular<MT1>::value ) {
4100 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4104 sgemv( y, A, x, scalar, 1.0F );
4125 template<
typename VT1
4129 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4130 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4132 if( IsTriangular<MT1>::value ) {
4134 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4138 dgemv( y, A, x, scalar, 1.0 );
4159 template<
typename VT1
4163 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4164 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4166 if( IsTriangular<MT1>::value ) {
4168 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4172 cgemv( y, A, x, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4193 template<
typename VT1
4197 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4198 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4200 if( IsTriangular<MT1>::value ) {
4202 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4206 zgemv( y, A, x, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4228 template<
typename VT1 >
4229 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4235 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4236 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4238 if( left.rows() == 0UL || left.columns() == 0UL ) {
4250 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
4265 template<
typename VT1
4269 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4271 if( ( IsDiagonal<MT1>::value ) ||
4272 ( IsComputation<MT>::value && !evaluateMatrix ) ||
4274 selectSmallSubAssignKernel( y, A, x, scalar );
4276 selectBlasSubAssignKernel( y, A, x, scalar );
4294 template<
typename VT1
4298 static inline void selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4300 y.subAssign( A * x * scalar );
4318 template<
typename VT1
4322 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4323 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4325 selectDefaultSubAssignKernel( y, A, x, scalar );
4344 template<
typename VT1
4348 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4349 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4351 typedef IntrinsicTrait<ElementType> IT;
4353 const size_t M( A.rows() );
4354 const size_t N( A.columns() );
4356 const IntrinsicType factor(
set( scalar ) );
4362 const size_t jbegin( ( IsUpper<MT1>::value )
4363 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4365 const size_t jend( ( IsLower<MT1>::value )
4366 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4370 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4372 for(
size_t j=jbegin; j<jend; ++j ) {
4373 const IntrinsicType x1(
set( x[j] ) );
4374 xmm1 = xmm1 + A.load(i ,j) * x1;
4375 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4376 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4377 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4378 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
4379 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
4380 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
4381 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
4384 y.store( i , y.load(i ) - xmm1*factor );
4396 const size_t jbegin( ( IsUpper<MT1>::value )
4397 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4399 const size_t jend( ( IsLower<MT1>::value )
4400 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4404 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4406 for(
size_t j=jbegin; j<jend; ++j ) {
4407 const IntrinsicType x1(
set( x[j] ) );
4408 xmm1 = xmm1 + A.load(i ,j) * x1;
4409 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4410 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4411 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4414 y.store( i , y.load(i ) - xmm1*factor );
4422 const size_t jbegin( ( IsUpper<MT1>::value )
4423 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4425 const size_t jend( ( IsLower<MT1>::value )
4426 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4430 IntrinsicType xmm1, xmm2, xmm3;
4432 for(
size_t j=jbegin; j<jend; ++j ) {
4433 const IntrinsicType x1(
set( x[j] ) );
4434 xmm1 = xmm1 + A.load(i ,j) * x1;
4435 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4436 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4439 y.store( i , y.load(i ) - xmm1*factor );
4446 const size_t jbegin( ( IsUpper<MT1>::value )
4447 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4449 const size_t jend( ( IsLower<MT1>::value )
4450 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4454 IntrinsicType xmm1, xmm2;
4456 for(
size_t j=jbegin; j<jend; ++j ) {
4457 const IntrinsicType x1(
set( x[j] ) );
4458 xmm1 = xmm1 + A.load(i ,j) * x1;
4459 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
4462 y.store( i , y.load(i ) - xmm1*factor );
4467 const size_t jbegin( ( IsUpper<MT1>::value )
4468 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4470 const size_t jend( ( IsLower<MT1>::value )
4471 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4477 for(
size_t j=jbegin; j<jend; ++j ) {
4478 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
4481 y.store( i, y.load(i) - xmm1*factor );
4500 template<
typename VT1
4504 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4505 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4507 selectDefaultSubAssignKernel( y, A, x, scalar );
4526 template<
typename VT1
4530 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4531 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4533 typedef IntrinsicTrait<ElementType> IT;
4535 const size_t M( A.rows() );
4536 const size_t N( A.columns() );
4538 const size_t iblock( 32768UL /
sizeof( ElementType ) );
4539 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
4541 const IntrinsicType factor(
set( scalar ) );
4545 for(
size_t ii=0U; ii<M; ii+=iblock ) {
4546 for(
size_t jj=0UL; jj<N; jj+=jblock )
4548 const size_t jend(
min( jj+jblock, N ) );
4549 const size_t itmp(
min( ii+iblock, M ) );
4550 const size_t iend( ( IsUpper<MT1>::value )
4551 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
4554 size_t i( ( IsLower<MT1>::value )
4555 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
4560 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4562 for(
size_t j=jj; j<jend; ++j ) {
4563 const IntrinsicType x1(
set( x[j] ) );
4564 xmm1 = xmm1 + A.load(i ,j) * x1;
4565 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4566 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4567 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4568 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
4569 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
4570 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
4571 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
4574 y.store( i , y.load(i ) - xmm1*factor );
4586 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4588 for(
size_t j=jj; j<jend; ++j ) {
4589 const IntrinsicType x1(
set( x[j] ) );
4590 xmm1 = xmm1 + A.load(i ,j) * x1;
4591 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4592 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4593 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4596 y.store( i , y.load(i ) - xmm1*factor );
4604 IntrinsicType xmm1, xmm2, xmm3;
4606 for(
size_t j=jj; j<jend; ++j ) {
4607 const IntrinsicType x1(
set( x[j] ) );
4608 xmm1 = xmm1 + A.load(i ,j) * x1;
4609 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4610 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4613 y.store( i , y.load(i ) - xmm1*factor );
4620 IntrinsicType xmm1, xmm2;
4622 for(
size_t j=jj; j<jend; ++j ) {
4623 const IntrinsicType x1(
set( x[j] ) );
4624 xmm1 = xmm1 + A.load(i ,j) * x1;
4625 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
4628 y.store( i , y.load(i ) - xmm1*factor );
4636 for(
size_t j=jj; j<jend; ++j ) {
4637 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
4640 y.store( i, y.load(i) - xmm1*factor );
4661 template<
typename VT1
4665 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4666 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4668 selectLargeSubAssignKernel( y, A, x, scalar );
4687 template<
typename VT1
4691 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4692 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4694 if( IsTriangular<MT1>::value ) {
4696 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4700 sgemv( y, A, x, -scalar, 1.0F );
4721 template<
typename VT1
4725 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,MT1,VT2,ST2> >::Type
4726 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4728 if( IsTriangular<MT1>::value ) {
4730 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4734 dgemv( y, A, x, -scalar, 1.0 );
4755 template<
typename VT1
4759 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4760 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4762 if( IsTriangular<MT1>::value ) {
4764 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4768 cgemv( y, A, x, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4789 template<
typename VT1
4793 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,MT1,VT2> >::Type
4794 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4796 if( IsTriangular<MT1>::value ) {
4798 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4802 zgemv( y, A, x, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4824 template<
typename VT1 >
4825 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4835 const ResultType tmp(
serial( rhs ) );
4858 template<
typename VT1 >
4859 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4860 smpAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4866 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4867 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4869 if( left.rows() == 0UL ) {
4872 else if( left.columns() == 0UL ) {
4903 template<
typename VT1 >
4904 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4905 smpAssign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4915 const ResultType tmp( rhs );
4934 template<
typename VT1 >
4935 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4936 smpAddAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4942 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4943 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4945 if( left.rows() == 0UL || left.columns() == 0UL ) {
4979 template<
typename VT1 >
4980 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4981 smpSubAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4987 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4988 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4990 if( left.rows() == 0UL || left.columns() == 0UL ) {
5025 template<
typename VT1 >
5026 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5027 smpMultAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
5037 const ResultType tmp( rhs );
5100 template<
typename T1
5102 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
5108 throw std::invalid_argument(
"Matrix and vector sizes do not match" );
5125 template<
typename MT,
typename VT >
5143 template<
typename MT,
typename VT,
bool AF >
5148 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type
5149 ,
typename SubvectorExprTrait<const VT,AF>::Type >::Type Type;
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: TransposeFlag.h:159
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:272
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:121
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:276
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:126
Constraint on the data type.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:119
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:290
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:376
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:410
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:263
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
Header file for the IsMatMatMultExpr type trait class.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:281
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
Constraints on the storage order of matrix types.
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:124
Header file for the serial shim.
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:312
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:326
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:386
Header file for the HasConstDataAccess type trait.
Header file for BLAS level 2 functions.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:275
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:122
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
const size_t TDMATDVECMULT_THRESHOLD
Column-major dense matrix/dense vector multiplication threshold.This setting specifies the threshold ...
Definition: Thresholds.h:74
Base template for the MultTrait class.
Definition: MultTrait.h:150
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:398
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:120
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:287
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:284
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:274
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:366
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:273
const size_t SMP_TDMATDVECMULT_THRESHOLD
SMP column-major dense matrix/dense vector multiplication threshold.This threshold specifies when a c...
Definition: Thresholds.h:345
Header file for the HasMutableDataAccess type trait.
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:123
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Header file for all intrinsic functionality.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:430
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:332
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
Header file for the IsUpper type trait.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:440
Header file for the MatVecMultExpr base class.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:441
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:278
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:277
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:420
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849