35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
117 template<
typename MT
119 class TDMatDVecMultExpr :
public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
120 ,
private MatVecMultExpr
121 ,
private Computation
150 template<
typename T1 >
151 struct UseSMPAssign {
152 enum { value = ( evaluateMatrix || evaluateVector ) };
162 template<
typename T1,
typename T2,
typename T3 >
163 struct UseBlasKernel {
169 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
185 template<
typename T1,
typename T2,
typename T3 >
186 struct UseVectorizedDefaultKernel {
189 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
224 MT::vectorizable && VT::vectorizable &&
230 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
231 !evaluateVector && VT::smpAssignable };
259 mat_.columns() == 0UL )
263 return mat_(index,index) *
vec_[index];
270 :(
mat_.columns() ) );
273 const size_t jnum( jend - jbegin );
274 const size_t jpos( jbegin + ( ( jnum - 1UL ) &
size_t(-2) ) + 1UL );
276 ElementType res(
mat_(index,jbegin) *
vec_[jbegin] );
278 for(
size_t j=jbegin+1UL; j<jpos; j+=2UL ) {
282 res +=
mat_(index,jpos) *
vec_[jpos];
296 inline ReturnType
at(
size_t index )
const {
297 if( index >=
mat_.rows() ) {
300 return (*
this)[index];
340 template<
typename T >
342 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
352 template<
typename T >
354 return (
mat_.isAliased( alias ) ||
vec_.isAliased( alias ) );
364 return mat_.isAligned() &&
vec_.isAligned();
376 (
mat_.rows() *
mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
377 (
size() > SMP_TDMATDVECMULT_THRESHOLD );
400 template<
typename VT1 >
407 if( rhs.mat_.rows() == 0UL ) {
410 else if( rhs.mat_.columns() == 0UL ) {
415 LT A(
serial( rhs.mat_ ) );
416 RT x(
serial( rhs.vec_ ) );
423 TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
439 template<
typename VT1
442 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
446 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
447 selectSmallAssignKernel( y, A, x );
449 selectBlasAssignKernel( y, A, x );
468 template<
typename VT1
471 static inline void selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
473 const size_t M( A.rows() );
474 const size_t N( A.columns() );
476 if( IsStrictlyLower<MT1>::value ) {
480 if( !IsUpper<MT1>::value )
482 for(
size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
483 y[i] = A(i,0UL) * x[0UL];
487 for(
size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
489 if( IsDiagonal<MT1>::value )
491 y[j] = A(j,j) * x[j];
495 const size_t ibegin( ( IsLower<MT1>::value )
496 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
498 const size_t iend( ( IsUpper<MT1>::value )
499 ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
503 const size_t inum( iend - ibegin );
504 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
506 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
507 y[i ] += A(i ,j) * x[j];
508 y[i+1UL] += A(i+1UL,j) * x[j];
511 y[ipos] += A(ipos,j) * x[j];
513 if( IsUpper<MT1>::value ) {
514 y[iend] = A(iend,j) * x[j];
519 if( IsStrictlyUpper<MT1>::value ) {
540 template<
typename VT1
543 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
544 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
546 selectDefaultAssignKernel( y, A, x );
565 template<
typename VT1
568 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
569 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
571 typedef IntrinsicTrait<ElementType> IT;
573 const size_t M( A.rows() );
574 const size_t N( A.columns() );
576 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
578 const size_t ipos( remainder ? ( M &
size_t(-
IT::size) ) : M );
585 const size_t jbegin( ( IsUpper<MT1>::value )
586 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
588 const size_t jend( ( IsLower<MT1>::value )
589 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
593 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
595 for(
size_t j=jbegin; j<jend; ++j ) {
596 const IntrinsicType x1(
set( x[j] ) );
597 xmm1 = xmm1 + A.load(i ,j) * x1;
598 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
599 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
600 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
601 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
602 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
603 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
604 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
619 const size_t jbegin( ( IsUpper<MT1>::value )
620 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
622 const size_t jend( ( IsLower<MT1>::value )
623 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
627 IntrinsicType xmm1, xmm2, xmm3, xmm4;
629 for(
size_t j=jbegin; j<jend; ++j ) {
630 const IntrinsicType x1(
set( x[j] ) );
631 xmm1 = xmm1 + A.load(i ,j) * x1;
632 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
633 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
634 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
645 const size_t jbegin( ( IsUpper<MT1>::value )
646 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
648 const size_t jend( ( IsLower<MT1>::value )
649 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
653 IntrinsicType xmm1, xmm2, xmm3;
655 for(
size_t j=jbegin; j<jend; ++j ) {
656 const IntrinsicType x1(
set( x[j] ) );
657 xmm1 = xmm1 + A.load(i ,j) * x1;
658 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
659 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
669 const size_t jbegin( ( IsUpper<MT1>::value )
670 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
672 const size_t jend( ( IsLower<MT1>::value )
673 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
677 IntrinsicType xmm1, xmm2;
679 for(
size_t j=jbegin; j<jend; ++j ) {
680 const IntrinsicType x1(
set( x[j] ) );
681 xmm1 = xmm1 + A.load(i ,j) * x1;
682 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
691 const size_t jbegin( ( IsUpper<MT1>::value )
692 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
694 const size_t jend( ( IsLower<MT1>::value )
695 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
701 for(
size_t j=jbegin; j<jend; ++j ) {
702 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
708 for( ; remainder && i<M; ++i )
710 const size_t jbegin( ( IsUpper<MT1>::value )
711 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
713 const size_t jend( ( IsLower<MT1>::value )
714 ?(
min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
720 for(
size_t j=jbegin; j<jend; ++j ) {
721 value += A(i,j) * x[j];
744 template<
typename VT1
747 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
748 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
750 selectDefaultAssignKernel( y, A, x );
769 template<
typename VT1
772 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
773 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
775 typedef IntrinsicTrait<ElementType> IT;
777 const size_t M( A.rows() );
778 const size_t N( A.columns() );
780 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
782 const size_t iblock( 32768UL /
sizeof( ElementType ) );
783 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
789 for(
size_t ii=0U; ii<M; ii+=iblock ) {
790 for(
size_t jj=0UL; jj<N; jj+=jblock )
792 const size_t jend(
min( jj+jblock, N ) );
793 const size_t itmp(
min( ii+iblock, M ) );
794 const size_t iend( ( IsUpper<MT1>::value )
795 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
798 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
801 size_t i( ( IsLower<MT1>::value )
802 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
807 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
809 for(
size_t j=jj; j<jend; ++j ) {
810 const IntrinsicType x1(
set( x[j] ) );
811 xmm1 = xmm1 + A.load(i ,j) * x1;
812 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
813 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
814 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
815 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
816 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
817 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
818 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
821 y.store( i , y.load(i ) + xmm1 );
833 IntrinsicType xmm1, xmm2, xmm3, xmm4;
835 for(
size_t j=jj; j<jend; ++j ) {
836 const IntrinsicType x1(
set( x[j] ) );
837 xmm1 = xmm1 + A.load(i ,j) * x1;
838 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
839 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
840 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
843 y.store( i , y.load(i ) + xmm1 );
851 IntrinsicType xmm1, xmm2, xmm3;
853 for(
size_t j=jj; j<jend; ++j ) {
854 const IntrinsicType x1(
set( x[j] ) );
855 xmm1 = xmm1 + A.load(i ,j) * x1;
856 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
857 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
860 y.store( i , y.load(i ) + xmm1 );
867 IntrinsicType xmm1, xmm2;
869 for(
size_t j=jj; j<jend; ++j ) {
870 const IntrinsicType x1(
set( x[j] ) );
871 xmm1 = xmm1 + A.load(i ,j) * x1;
872 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
875 y.store( i , y.load(i ) + xmm1 );
883 for(
size_t j=jj; j<jend; ++j ) {
884 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
887 y.store( i, y.load(i) + xmm1 );
890 for( ; remainder && i<iend; ++i )
894 for(
size_t j=jj; j<jend; ++j ) {
895 value += A(i,j) * x[j];
920 template<
typename VT1
923 static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
924 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
926 selectLargeAssignKernel( y, A, x );
946 template<
typename VT1
949 static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
950 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
954 if( IsTriangular<MT1>::value ) {
956 trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
959 gemv( y, A, x, ET(1), ET(0) );
979 template<
typename VT1 >
980 friend inline void assign( SparseVector<VT1,false>& lhs,
const TDMatDVecMultExpr& rhs )
990 const ResultType tmp(
serial( rhs ) );
1009 template<
typename VT1 >
1010 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const TDMatDVecMultExpr& rhs )
1016 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1020 LT A(
serial( rhs.mat_ ) );
1021 RT x(
serial( rhs.vec_ ) );
1028 TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1044 template<
typename VT1
1047 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1049 if( ( IsDiagonal<MT1>::value ) ||
1050 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1051 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1052 selectSmallAddAssignKernel( y, A, x );
1054 selectBlasAddAssignKernel( y, A, x );
1073 template<
typename VT1
1076 static inline void selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1078 const size_t M( A.rows() );
1079 const size_t N( A.columns() );
1081 for(
size_t j=0UL; j<N; ++j )
1083 if( IsDiagonal<MT1>::value )
1085 y[j] += A(j,j) * x[j];
1089 const size_t ibegin( ( IsLower<MT1>::value )
1090 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1092 const size_t iend( ( IsUpper<MT1>::value )
1093 ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1097 const size_t inum( iend - ibegin );
1098 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
1100 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
1101 y[i ] += A(i ,j) * x[j];
1102 y[i+1UL] += A(i+1UL,j) * x[j];
1105 y[ipos] += A(ipos,j) * x[j];
1127 template<
typename VT1
1130 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1131 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1133 selectDefaultAddAssignKernel( y, A, x );
1152 template<
typename VT1
1155 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1156 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1158 typedef IntrinsicTrait<ElementType> IT;
1160 const size_t M( A.rows() );
1161 const size_t N( A.columns() );
1163 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1165 const size_t ipos( remainder ? ( M &
size_t(-
IT::size) ) : M );
1172 const size_t jbegin( ( IsUpper<MT1>::value )
1173 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1175 const size_t jend( ( IsLower<MT1>::value )
1176 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1180 IntrinsicType xmm1( y.load(i ) );
1181 IntrinsicType xmm2( y.load(i+
IT::size ) );
1182 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1183 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1184 IntrinsicType xmm5( y.load(i+
IT::size*4UL) );
1185 IntrinsicType xmm6( y.load(i+
IT::size*5UL) );
1186 IntrinsicType xmm7( y.load(i+
IT::size*6UL) );
1187 IntrinsicType xmm8( y.load(i+
IT::size*7UL) );
1189 for(
size_t j=jbegin; j<jend; ++j ) {
1190 const IntrinsicType x1(
set( x[j] ) );
1191 xmm1 = xmm1 + A.load(i ,j) * x1;
1192 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1193 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1194 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1195 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
1196 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
1197 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
1198 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
1201 y.store( i , xmm1 );
1213 const size_t jbegin( ( IsUpper<MT1>::value )
1214 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1216 const size_t jend( ( IsLower<MT1>::value )
1217 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1221 IntrinsicType xmm1( y.load(i ) );
1222 IntrinsicType xmm2( y.load(i+
IT::size ) );
1223 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1224 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1226 for(
size_t j=jbegin; j<jend; ++j ) {
1227 const IntrinsicType x1(
set( x[j] ) );
1228 xmm1 = xmm1 + A.load(i ,j) * x1;
1229 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1230 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1231 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1234 y.store( i , xmm1 );
1242 const size_t jbegin( ( IsUpper<MT1>::value )
1243 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1245 const size_t jend( ( IsLower<MT1>::value )
1246 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1250 IntrinsicType xmm1( y.load(i ) );
1251 IntrinsicType xmm2( y.load(i+
IT::size ) );
1252 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1254 for(
size_t j=jbegin; j<jend; ++j ) {
1255 const IntrinsicType x1(
set( x[j] ) );
1256 xmm1 = xmm1 + A.load(i ,j) * x1;
1257 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1258 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1261 y.store( i , xmm1 );
1268 const size_t jbegin( ( IsUpper<MT1>::value )
1269 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1271 const size_t jend( ( IsLower<MT1>::value )
1272 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1276 IntrinsicType xmm1( y.load(i ) );
1277 IntrinsicType xmm2( y.load(i+
IT::size) );
1279 for(
size_t j=jbegin; j<jend; ++j ) {
1280 const IntrinsicType x1(
set( x[j] ) );
1281 xmm1 = xmm1 + A.load(i ,j) * x1;
1282 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
1285 y.store( i , xmm1 );
1291 const size_t jbegin( ( IsUpper<MT1>::value )
1292 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1294 const size_t jend( ( IsLower<MT1>::value )
1295 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1299 IntrinsicType xmm1( y.load(i) );
1301 for(
size_t j=jbegin; j<jend; ++j ) {
1302 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
1308 for( ; remainder && i<M; ++i )
1310 const size_t jbegin( ( IsUpper<MT1>::value )
1311 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1313 const size_t jend( ( IsLower<MT1>::value )
1314 ?(
min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1320 for(
size_t j=jbegin; j<jend; ++j ) {
1321 value += A(i,j) * x[j];
1344 template<
typename VT1
1347 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1348 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1350 selectDefaultAddAssignKernel( y, A, x );
1369 template<
typename VT1
1372 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1373 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1375 typedef IntrinsicTrait<ElementType> IT;
1377 const size_t M( A.rows() );
1378 const size_t N( A.columns() );
1380 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1382 const size_t iblock( 32768UL /
sizeof( ElementType ) );
1383 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1387 for(
size_t ii=0U; ii<M; ii+=iblock ) {
1388 for(
size_t jj=0UL; jj<N; jj+=jblock )
1390 const size_t jend(
min( jj+jblock, N ) );
1391 const size_t itmp(
min( ii+iblock, M ) );
1392 const size_t iend( ( IsUpper<MT1>::value )
1393 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1396 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
1399 size_t i( ( IsLower<MT1>::value )
1400 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
1405 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1407 for(
size_t j=jj; j<jend; ++j ) {
1408 const IntrinsicType x1(
set( x[j] ) );
1409 xmm1 = xmm1 + A.load(i ,j) * x1;
1410 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1411 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1412 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1413 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
1414 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
1415 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
1416 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
1419 y.store( i , y.load(i ) + xmm1 );
1431 IntrinsicType xmm1, xmm2, xmm3, xmm4;
1433 for(
size_t j=jj; j<jend; ++j ) {
1434 const IntrinsicType x1(
set( x[j] ) );
1435 xmm1 = xmm1 + A.load(i ,j) * x1;
1436 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1437 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1438 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1441 y.store( i , y.load(i ) + xmm1 );
1449 IntrinsicType xmm1, xmm2, xmm3;
1451 for(
size_t j=jj; j<jend; ++j ) {
1452 const IntrinsicType x1(
set( x[j] ) );
1453 xmm1 = xmm1 + A.load(i ,j) * x1;
1454 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1455 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1458 y.store( i , y.load(i ) + xmm1 );
1465 IntrinsicType xmm1, xmm2;
1467 for(
size_t j=jj; j<jend; ++j ) {
1468 const IntrinsicType x1(
set( x[j] ) );
1469 xmm1 = xmm1 + A.load(i ,j) * x1;
1470 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
1473 y.store( i , y.load(i ) + xmm1 );
1481 for(
size_t j=jj; j<jend; ++j ) {
1482 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
1485 y.store( i, y.load(i) + xmm1 );
1488 for( ; remainder && i<iend; ++i )
1492 for(
size_t j=jj; j<jend; ++j ) {
1493 value += A(i,j) * x[j];
1518 template<
typename VT1
1521 static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
1522 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1524 selectLargeAddAssignKernel( y, A, x );
1544 template<
typename VT1
1547 static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
1548 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1552 if( IsTriangular<MT1>::value ) {
1554 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1555 addAssign( y, tmp );
1558 gemv( y, A, x, ET(1), ET(1) );
1582 template<
typename VT1 >
1583 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const TDMatDVecMultExpr& rhs )
1589 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1593 LT A(
serial( rhs.mat_ ) );
1594 RT x(
serial( rhs.vec_ ) );
1601 TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1617 template<
typename VT1
1620 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1622 if( ( IsDiagonal<MT1>::value ) ||
1623 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1624 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1625 selectSmallSubAssignKernel( y, A, x );
1627 selectBlasSubAssignKernel( y, A, x );
1646 template<
typename VT1
1649 static inline void selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1651 const size_t M( A.rows() );
1652 const size_t N( A.columns() );
1654 for(
size_t j=0UL; j<N; ++j )
1656 if( IsDiagonal<MT1>::value )
1658 y[j] -= A(j,j) * x[j];
1662 const size_t ibegin( ( IsLower<MT1>::value )
1663 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1665 const size_t iend( ( IsUpper<MT1>::value )
1666 ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1670 const size_t inum( iend - ibegin );
1671 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
1673 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
1674 y[i ] -= A(i ,j) * x[j];
1675 y[i+1UL] -= A(i+1UL,j) * x[j];
1678 y[ipos] -= A(ipos,j) * x[j];
1700 template<
typename VT1
1703 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1704 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1706 selectDefaultSubAssignKernel( y, A, x );
1726 template<
typename VT1
1729 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1730 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1732 typedef IntrinsicTrait<ElementType> IT;
1734 const size_t M( A.rows() );
1735 const size_t N( A.columns() );
1737 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1739 const size_t ipos( remainder ? ( M &
size_t(-
IT::size) ) : M );
1746 const size_t jbegin( ( IsUpper<MT1>::value )
1747 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1749 const size_t jend( ( IsLower<MT1>::value )
1750 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1754 IntrinsicType xmm1( y.load(i ) );
1755 IntrinsicType xmm2( y.load(i+
IT::size ) );
1756 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1757 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1758 IntrinsicType xmm5( y.load(i+
IT::size*4UL) );
1759 IntrinsicType xmm6( y.load(i+
IT::size*5UL) );
1760 IntrinsicType xmm7( y.load(i+
IT::size*6UL) );
1761 IntrinsicType xmm8( y.load(i+
IT::size*7UL) );
1763 for(
size_t j=jbegin; j<jend; ++j ) {
1764 const IntrinsicType x1(
set( x[j] ) );
1765 xmm1 = xmm1 - A.load(i ,j) * x1;
1766 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1767 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1768 xmm4 = xmm4 - A.load(i+
IT::size*3UL,j) * x1;
1769 xmm5 = xmm5 - A.load(i+
IT::size*4UL,j) * x1;
1770 xmm6 = xmm6 - A.load(i+
IT::size*5UL,j) * x1;
1771 xmm7 = xmm7 - A.load(i+
IT::size*6UL,j) * x1;
1772 xmm8 = xmm8 - A.load(i+
IT::size*7UL,j) * x1;
1775 y.store( i , xmm1 );
1787 const size_t jbegin( ( IsUpper<MT1>::value )
1788 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1790 const size_t jend( ( IsLower<MT1>::value )
1791 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1795 IntrinsicType xmm1( y.load(i ) );
1796 IntrinsicType xmm2( y.load(i+
IT::size ) );
1797 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1798 IntrinsicType xmm4( y.load(i+
IT::size*3UL) );
1800 for(
size_t j=jbegin; j<jend; ++j ) {
1801 const IntrinsicType x1(
set( x[j] ) );
1802 xmm1 = xmm1 - A.load(i ,j) * x1;
1803 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1804 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1805 xmm4 = xmm4 - A.load(i+
IT::size*3UL,j) * x1;
1808 y.store( i , xmm1 );
1816 const size_t jbegin( ( IsUpper<MT1>::value )
1817 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1819 const size_t jend( ( IsLower<MT1>::value )
1820 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1824 IntrinsicType xmm1( y.load(i ) );
1825 IntrinsicType xmm2( y.load(i+
IT::size ) );
1826 IntrinsicType xmm3( y.load(i+
IT::size*2UL) );
1828 for(
size_t j=jbegin; j<jend; ++j ) {
1829 const IntrinsicType x1(
set( x[j] ) );
1830 xmm1 = xmm1 - A.load(i ,j) * x1;
1831 xmm2 = xmm2 - A.load(i+
IT::size ,j) * x1;
1832 xmm3 = xmm3 - A.load(i+
IT::size*2UL,j) * x1;
1835 y.store( i , xmm1 );
1842 const size_t jbegin( ( IsUpper<MT1>::value )
1843 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1845 const size_t jend( ( IsLower<MT1>::value )
1846 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1850 IntrinsicType xmm1( y.load(i ) );
1851 IntrinsicType xmm2( y.load(i+
IT::size) );
1853 for(
size_t j=jbegin; j<jend; ++j ) {
1854 const IntrinsicType x1(
set( x[j] ) );
1855 xmm1 = xmm1 - A.load(i ,j) * x1;
1856 xmm2 = xmm2 - A.load(i+
IT::size,j) * x1;
1859 y.store( i , xmm1 );
1865 const size_t jbegin( ( IsUpper<MT1>::value )
1866 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1868 const size_t jend( ( IsLower<MT1>::value )
1869 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1873 IntrinsicType xmm1( y.load(i) );
1875 for(
size_t j=jbegin; j<jend; ++j ) {
1876 xmm1 = xmm1 - A.load(i,j) *
set( x[j] );
1882 for( ; remainder && i<M; ++i )
1884 const size_t jbegin( ( IsUpper<MT1>::value )
1885 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1887 const size_t jend( ( IsLower<MT1>::value )
1888 ?(
min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1894 for(
size_t j=jbegin; j<jend; ++j ) {
1895 value += A(i,j) * x[j];
1918 template<
typename VT1
1921 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1922 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1924 selectDefaultSubAssignKernel( y, A, x );
1944 template<
typename VT1
1947 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1948 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
1950 typedef IntrinsicTrait<ElementType> IT;
1952 const size_t M( A.rows() );
1953 const size_t N( A.columns() );
1955 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1957 const size_t iblock( 32768UL /
sizeof( ElementType ) );
1958 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1962 for(
size_t ii=0U; ii<M; ii+=iblock ) {
1963 for(
size_t jj=0UL; jj<N; jj+=jblock )
1965 const size_t jend(
min( jj+jblock, N ) );
1966 const size_t itmp(
min( ii+iblock, M ) );
1967 const size_t iend( ( IsUpper<MT1>::value )
1968 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1971 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
1974 size_t i( ( IsLower<MT1>::value )
1975 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
1980 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1982 for(
size_t j=jj; j<jend; ++j ) {
1983 const IntrinsicType x1(
set( x[j] ) );
1984 xmm1 = xmm1 + A.load(i ,j) * x1;
1985 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
1986 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
1987 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
1988 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
1989 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
1990 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
1991 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
1994 y.store( i , y.load(i ) - xmm1 );
2006 IntrinsicType xmm1, xmm2, xmm3, xmm4;
2008 for(
size_t j=jj; j<jend; ++j ) {
2009 const IntrinsicType x1(
set( x[j] ) );
2010 xmm1 = xmm1 + A.load(i ,j) * x1;
2011 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2012 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2013 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2016 y.store( i , y.load(i ) - xmm1 );
2024 IntrinsicType xmm1, xmm2, xmm3;
2026 for(
size_t j=jj; j<jend; ++j ) {
2027 const IntrinsicType x1(
set( x[j] ) );
2028 xmm1 = xmm1 + A.load(i ,j) * x1;
2029 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2030 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2033 y.store( i , y.load(i ) - xmm1 );
2040 IntrinsicType xmm1, xmm2;
2042 for(
size_t j=jj; j<jend; ++j ) {
2043 const IntrinsicType x1(
set( x[j] ) );
2044 xmm1 = xmm1 + A.load(i ,j) * x1;
2045 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
2048 y.store( i , y.load(i ) - xmm1 );
2056 for(
size_t j=jj; j<jend; ++j ) {
2057 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
2060 y.store( i, y.load(i) - xmm1 );
2063 for( ; remainder && i<iend; ++i )
2067 for(
size_t j=jj; j<jend; ++j ) {
2068 value += A(i,j) * x[j];
2093 template<
typename VT1
2096 static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
2097 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2099 selectLargeSubAssignKernel( y, A, x );
2119 template<
typename VT1
2122 static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
2123 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
2127 if( IsTriangular<MT1>::value ) {
2129 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2130 subAssign( y, tmp );
2133 gemv( y, A, x, ET(-1), ET(1) );
2157 template<
typename VT1 >
2158 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const TDMatDVecMultExpr& rhs )
2168 const ResultType tmp(
serial( rhs ) );
2169 multAssign( ~lhs, tmp );
2193 template<
typename VT1 >
2194 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2201 if( rhs.mat_.rows() == 0UL ) {
2204 else if( rhs.mat_.columns() == 0UL ) {
2237 template<
typename VT1 >
2238 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2249 const ResultType tmp( rhs );
2270 template<
typename VT1 >
2271 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2278 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2314 template<
typename VT1 >
2315 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2322 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2358 template<
typename VT1 >
2359 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2370 const ResultType tmp( rhs );
2410 template<
typename MT
2414 :
public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
2415 ,
private VecScalarMultExpr
2416 ,
private Computation
2420 typedef TDMatDVecMultExpr<MT,VT> MVM;
2432 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2433 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2438 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2446 template<
typename T1 >
2447 struct UseSMPAssign {
2448 enum { value = ( evaluateMatrix || evaluateVector ) };
2456 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2457 struct UseBlasKernel {
2459 HasMutableDataAccess<T1>::value &&
2460 HasConstDataAccess<T2>::value &&
2461 HasConstDataAccess<T3>::value &&
2462 !IsDiagonal<T2>::value &&
2463 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2464 IsBlasCompatible<typename T1::ElementType>::value &&
2465 IsBlasCompatible<typename T2::ElementType>::value &&
2466 IsBlasCompatible<typename T3::ElementType>::value &&
2467 IsSame< typename T1::ElementType, typename T2::ElementType >::value &&
2468 IsSame< typename T1::ElementType, typename T3::ElementType >::value &&
2469 !( IsBuiltin<typename T1::ElementType>::value && IsComplex<T4>::value ) };
2478 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2479 struct UseVectorizedDefaultKernel {
2481 !IsDiagonal<T2>::value &&
2482 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2483 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2484 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2485 IsSame<typename T1::ElementType,T4>::value &&
2486 IntrinsicTrait<typename T1::ElementType>::addition &&
2487 IntrinsicTrait<typename T1::ElementType>::multiplication };
2493 typedef DVecScalarMultExpr<MVM,ST,false>
This;
2494 typedef typename MultTrait<RES,ST>::Type
ResultType;
2497 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2502 typedef const TDMatDVecMultExpr<MT,VT>
LeftOperand;
2508 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
LT;
2511 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
RT;
2516 enum { vectorizable = !IsDiagonal<MT>::value &&
2517 MT::vectorizable && VT::vectorizable &&
2518 IsSame<MET,VET>::value &&
2519 IsSame<MET,ST>::value &&
2520 IntrinsicTrait<MET>::addition &&
2521 IntrinsicTrait<MET>::multiplication };
2524 enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2525 !evaluateVector && VT::smpAssignable };
2534 explicit inline DVecScalarMultExpr(
const MVM& vector, ST scalar )
2546 inline ReturnType
operator[](
size_t index )
const {
2548 return vector_[index] * scalar_;
2559 inline ReturnType
at(
size_t index )
const {
2560 if( index >= vector_.size() ) {
2563 return (*
this)[index];
2572 inline size_t size()
const {
2573 return vector_.size();
2603 template<
typename T >
2604 inline bool canAlias(
const T* alias )
const {
2605 return vector_.canAlias( alias );
2615 template<
typename T >
2616 inline bool isAliased(
const T* alias )
const {
2617 return vector_.isAliased( alias );
2627 return vector_.isAligned();
2637 typename MVM::LeftOperand A( vector_.leftOperand() );
2639 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2640 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2641 (
size() > SMP_TDMATDVECMULT_THRESHOLD );
2647 LeftOperand vector_;
2648 RightOperand scalar_;
2663 template<
typename VT1 >
2664 friend inline void assign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
2670 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2671 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2673 if( left.rows() == 0UL ) {
2676 else if( left.columns() == 0UL ) {
2689 DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2704 template<
typename VT1
2708 static inline void selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2710 if( ( IsDiagonal<MT1>::value ) ||
2711 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2712 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2713 selectSmallAssignKernel( y, A, x, scalar );
2715 selectBlasAssignKernel( y, A, x, scalar );
2733 template<
typename VT1
2737 static inline void selectDefaultAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2739 const size_t M( A.rows() );
2740 const size_t N( A.columns() );
2742 if( IsStrictlyLower<MT1>::value ) {
2746 if( !IsUpper<MT1>::value )
2748 for(
size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
2749 y[i] = A(i,0UL) * x[0UL];
2753 for(
size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
2755 if( IsDiagonal<MT1>::value )
2757 y[j] = A(j,j) * x[j] * scalar;
2761 const size_t ibegin( ( IsLower<MT1>::value )
2762 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2764 const size_t iend( ( IsUpper<MT1>::value )
2765 ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
2769 const size_t inum( iend - ibegin );
2770 const size_t ipos( ibegin + ( inum &
size_t(-2) ) );
2772 for(
size_t i=ibegin; i<ipos; i+=2UL ) {
2773 y[i ] += A(i ,j) * x[j];
2774 y[i+1UL] += A(i+1UL,j) * x[j];
2777 y[ipos] += A(ipos,j) * x[j];
2779 if( IsUpper<MT1>::value ) {
2780 y[iend] = A(iend,j) * x[j];
2785 if( IsStrictlyUpper<MT1>::value ) {
2789 if( !IsDiagonal<MT1>::value )
2791 const size_t iend( IsStrictlyUpper<MT1>::value ? M-1UL : M );
2792 for(
size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<iend; ++i ) {
2813 template<
typename VT1
2817 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2818 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2820 selectDefaultAssignKernel( y, A, x, scalar );
2838 template<
typename VT1
2842 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2843 selectSmallAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
2845 typedef IntrinsicTrait<ElementType> IT;
2847 const size_t M( A.rows() );
2848 const size_t N( A.columns() );
2850 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
2852 const size_t ipos( remainder ? ( M &
size_t(-
IT::size) ) : M );
2855 const IntrinsicType factor(
set( scalar ) );
2861 const size_t jbegin( ( IsUpper<MT1>::value )
2862 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2864 const size_t jend( ( IsLower<MT1>::value )
2865 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2869 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2871 for(
size_t j=jbegin; j<jend; ++j ) {
2872 const IntrinsicType x1(
set( x[j] ) );
2873 xmm1 = xmm1 + A.load(i ,j) * x1;
2874 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2875 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2876 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2877 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
2878 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
2879 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
2880 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
2883 y.store( i , xmm1*factor );
2884 y.store( i+
IT::size , xmm2*factor );
2885 y.store( i+
IT::size*2UL, xmm3*factor );
2886 y.store( i+
IT::size*3UL, xmm4*factor );
2887 y.store( i+
IT::size*4UL, xmm5*factor );
2888 y.store( i+
IT::size*5UL, xmm6*factor );
2889 y.store( i+
IT::size*6UL, xmm7*factor );
2890 y.store( i+
IT::size*7UL, xmm8*factor );
2895 const size_t jbegin( ( IsUpper<MT1>::value )
2896 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2898 const size_t jend( ( IsLower<MT1>::value )
2899 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2903 IntrinsicType xmm1, xmm2, xmm3, xmm4;
2905 for(
size_t j=jbegin; j<jend; ++j ) {
2906 const IntrinsicType x1(
set( x[j] ) );
2907 xmm1 = xmm1 + A.load(i ,j) * x1;
2908 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2909 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2910 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
2913 y.store( i , xmm1*factor );
2914 y.store( i+
IT::size , xmm2*factor );
2915 y.store( i+
IT::size*2UL, xmm3*factor );
2916 y.store( i+
IT::size*3UL, xmm4*factor );
2921 const size_t jbegin( ( IsUpper<MT1>::value )
2922 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2924 const size_t jend( ( IsLower<MT1>::value )
2925 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2929 IntrinsicType xmm1, xmm2, xmm3;
2931 for(
size_t j=jbegin; j<jend; ++j ) {
2932 const IntrinsicType x1(
set( x[j] ) );
2933 xmm1 = xmm1 + A.load(i ,j) * x1;
2934 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
2935 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
2938 y.store( i , xmm1*factor );
2939 y.store( i+
IT::size , xmm2*factor );
2940 y.store( i+
IT::size*2UL, xmm3*factor );
2945 const size_t jbegin( ( IsUpper<MT1>::value )
2946 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2948 const size_t jend( ( IsLower<MT1>::value )
2949 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2953 IntrinsicType xmm1, xmm2;
2955 for(
size_t j=jbegin; j<jend; ++j ) {
2956 const IntrinsicType x1(
set( x[j] ) );
2957 xmm1 = xmm1 + A.load(i ,j) * x1;
2958 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
2961 y.store( i , xmm1*factor );
2962 y.store( i+
IT::size, xmm2*factor );
2967 const size_t jbegin( ( IsUpper<MT1>::value )
2968 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2970 const size_t jend( ( IsLower<MT1>::value )
2971 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2977 for(
size_t j=jbegin; j<jend; ++j ) {
2978 const IntrinsicType x1(
set( x[j] ) );
2979 xmm1 = xmm1 + A.load(i,j) * x1;
2982 y.store( i, xmm1*factor );
2985 for( ; remainder && i<M; ++i )
2987 const size_t jbegin( ( IsUpper<MT1>::value )
2988 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2990 const size_t jend( ( IsLower<MT1>::value )
2991 ?(
min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2997 for(
size_t j=jbegin; j<jend; ++j ) {
2998 value += A(i,j) * x[j];
3001 y[i] = value * scalar;
3020 template<
typename VT1
3024 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3025 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3027 selectDefaultAssignKernel( y, A, x, scalar );
3045 template<
typename VT1
3049 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3050 selectLargeAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3052 typedef IntrinsicTrait<ElementType> IT;
3054 const size_t M( A.rows() );
3055 const size_t N( A.columns() );
3057 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3059 const size_t iblock( 32768UL /
sizeof( ElementType ) );
3060 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3064 const IntrinsicType factor(
set( scalar ) );
3068 for(
size_t ii=0U; ii<M; ii+=iblock ) {
3069 for(
size_t jj=0UL; jj<N; jj+=jblock )
3071 const size_t jend(
min( jj+jblock, N ) );
3072 const size_t itmp(
min( ii+iblock, M ) );
3073 const size_t iend( ( IsUpper<MT1>::value )
3074 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3077 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
3080 size_t i( ( IsLower<MT1>::value )
3081 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
3086 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3088 for(
size_t j=jj; j<jend; ++j ) {
3089 const IntrinsicType x1(
set( x[j] ) );
3090 xmm1 = xmm1 + A.load(i ,j) * x1;
3091 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3092 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3093 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3094 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3095 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3096 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3097 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3100 y.store( i , y.load(i ) + xmm1*factor );
3112 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3114 for(
size_t j=jj; j<jend; ++j ) {
3115 const IntrinsicType x1(
set( x[j] ) );
3116 xmm1 = xmm1 + A.load(i ,j) * x1;
3117 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3118 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3119 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3122 y.store( i , y.load(i ) + xmm1*factor );
3130 IntrinsicType xmm1, xmm2, xmm3;
3132 for(
size_t j=jj; j<jend; ++j ) {
3133 const IntrinsicType x1(
set( x[j] ) );
3134 xmm1 = xmm1 + A.load(i ,j) * x1;
3135 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3136 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3139 y.store( i , y.load(i ) + xmm1*factor );
3146 IntrinsicType xmm1, xmm2;
3148 for(
size_t j=jj; j<jend; ++j ) {
3149 const IntrinsicType x1(
set( x[j] ) );
3150 xmm1 = xmm1 + A.load(i ,j) * x1;
3151 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3154 y.store( i , y.load(i ) + xmm1*factor );
3162 for(
size_t j=jj; j<jend; ++j ) {
3163 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
3166 y.store( i, y.load(i) + xmm1*factor );
3169 for( ; remainder && i<iend; ++i )
3173 for(
size_t j=jj; j<jend; ++j ) {
3174 value += A(i,j) * x[j];
3177 y[i] += value * scalar;
3198 template<
typename VT1
3202 static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3203 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3205 selectLargeAssignKernel( y, A, x, scalar );
3224 template<
typename VT1
3228 static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3229 selectBlasAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3233 if( IsTriangular<MT1>::value ) {
3234 assign( y, scalar * x );
3235 trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3238 gemv( y, A, x, ET(scalar), ET(0) );
3256 template<
typename VT1 >
3257 friend inline void assign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3267 const ResultType tmp(
serial( rhs ) );
3268 assign( ~lhs, tmp );
3284 template<
typename VT1 >
3285 friend inline void addAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3291 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3292 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3294 if( left.rows() == 0UL || left.columns() == 0UL ) {
3306 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3321 template<
typename VT1
3325 static inline void selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3327 if( ( IsDiagonal<MT1>::value ) ||
3328 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3329 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3330 selectSmallAddAssignKernel( y, A, x, scalar );
3332 selectBlasAddAssignKernel( y, A, x, scalar );
3350 template<
typename VT1
3354 static inline void selectDefaultAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3356 y.addAssign( A * x * scalar );
3374 template<
typename VT1
3378 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3379 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3381 selectDefaultAddAssignKernel( y, A, x, scalar );
3400 template<
typename VT1
3404 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3405 selectSmallAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3407 typedef IntrinsicTrait<ElementType> IT;
3409 const size_t M( A.rows() );
3410 const size_t N( A.columns() );
3412 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3414 const size_t ipos( remainder ? ( M &
size_t(-
IT::size) ) : M );
3417 const IntrinsicType factor(
set( scalar ) );
3423 const size_t jbegin( ( IsUpper<MT1>::value )
3424 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3426 const size_t jend( ( IsLower<MT1>::value )
3427 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3431 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3433 for(
size_t j=jbegin; j<jend; ++j ) {
3434 const IntrinsicType x1(
set( x[j] ) );
3435 xmm1 = xmm1 + A.load(i ,j) * x1;
3436 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3437 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3438 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3439 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3440 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3441 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3442 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3445 y.store( i , y.load(i ) + xmm1*factor );
3457 const size_t jbegin( ( IsUpper<MT1>::value )
3458 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3460 const size_t jend( ( IsLower<MT1>::value )
3461 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3465 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3467 for(
size_t j=jbegin; j<jend; ++j ) {
3468 const IntrinsicType x1(
set( x[j] ) );
3469 xmm1 = xmm1 + A.load(i ,j) * x1;
3470 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3471 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3472 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3475 y.store( i , y.load(i ) + xmm1*factor );
3483 const size_t jbegin( ( IsUpper<MT1>::value )
3484 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3486 const size_t jend( ( IsLower<MT1>::value )
3487 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3491 IntrinsicType xmm1, xmm2, xmm3;
3493 for(
size_t j=jbegin; j<jend; ++j ) {
3494 const IntrinsicType x1(
set( x[j] ) );
3495 xmm1 = xmm1 + A.load(i ,j) * x1;
3496 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3497 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3500 y.store( i , y.load(i ) + xmm1*factor );
3507 const size_t jbegin( ( IsUpper<MT1>::value )
3508 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3510 const size_t jend( ( IsLower<MT1>::value )
3511 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3515 IntrinsicType xmm1, xmm2;
3517 for(
size_t j=jbegin; j<jend; ++j ) {
3518 const IntrinsicType x1(
set( x[j] ) );
3519 xmm1 = xmm1 + A.load(i ,j) * x1;
3520 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3523 y.store( i , y.load(i ) + xmm1*factor );
3529 const size_t jbegin( ( IsUpper<MT1>::value )
3530 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3532 const size_t jend( ( IsLower<MT1>::value )
3533 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3539 for(
size_t j=jbegin; j<jend; ++j ) {
3540 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
3543 y.store( i, y.load(i) + xmm1*factor );
3546 for( ; remainder && i<M; ++i )
3548 const size_t jbegin( ( IsUpper<MT1>::value )
3549 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3551 const size_t jend( ( IsLower<MT1>::value )
3552 ?(
min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3558 for(
size_t j=jbegin; j<jend; ++j ) {
3559 value += A(i,j) * x[j];
3562 y[i] += value * scalar;
3581 template<
typename VT1
3585 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3586 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3588 selectDefaultAddAssignKernel( y, A, x, scalar );
3607 template<
typename VT1
3611 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3612 selectLargeAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3614 typedef IntrinsicTrait<ElementType> IT;
3616 const size_t M( A.rows() );
3617 const size_t N( A.columns() );
3619 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3621 const size_t iblock( 32768UL /
sizeof( ElementType ) );
3622 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3626 const IntrinsicType factor(
set( scalar ) );
3628 for(
size_t ii=0U; ii<M; ii+=iblock ) {
3629 for(
size_t jj=0UL; jj<N; jj+=jblock )
3631 const size_t jend(
min( jj+jblock, N ) );
3632 const size_t itmp(
min( ii+iblock, M ) );
3633 const size_t iend( ( IsUpper<MT1>::value )
3634 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3637 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
3640 size_t i( ( IsLower<MT1>::value )
3641 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
3646 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3648 for(
size_t j=jj; j<jend; ++j ) {
3649 const IntrinsicType x1(
set( x[j] ) );
3650 xmm1 = xmm1 + A.load(i ,j) * x1;
3651 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3652 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3653 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3654 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3655 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3656 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3657 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3660 y.store( i , y.load(i ) + xmm1*factor );
3672 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3674 for(
size_t j=jj; j<jend; ++j ) {
3675 const IntrinsicType x1(
set( x[j] ) );
3676 xmm1 = xmm1 + A.load(i ,j) * x1;
3677 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3678 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3679 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3682 y.store( i , y.load(i ) + xmm1*factor );
3690 IntrinsicType xmm1, xmm2, xmm3;
3692 for(
size_t j=jj; j<jend; ++j ) {
3693 const IntrinsicType x1(
set( x[j] ) );
3694 xmm1 = xmm1 + A.load(i ,j) * x1;
3695 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3696 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3699 y.store( i , y.load(i ) + xmm1*factor );
3706 IntrinsicType xmm1, xmm2;
3708 for(
size_t j=jj; j<jend; ++j ) {
3709 const IntrinsicType x1(
set( x[j] ) );
3710 xmm1 = xmm1 + A.load(i ,j) * x1;
3711 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
3714 y.store( i , y.load(i ) + xmm1*factor );
3722 for(
size_t j=jj; j<jend; ++j ) {
3723 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
3726 y.store( i, y.load(i) + xmm1*factor );
3729 for( ; remainder && i<iend; ++i )
3733 for(
size_t j=jj; j<jend; ++j ) {
3734 value += A(i,j) * x[j];
3737 y[i] += value * scalar;
3758 template<
typename VT1
3762 static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3763 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3765 selectLargeAddAssignKernel( y, A, x, scalar );
3784 template<
typename VT1
3788 static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3789 selectBlasAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3793 if( IsTriangular<MT1>::value ) {
3795 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3796 addAssign( y, tmp );
3799 gemv( y, A, x, ET(scalar), ET(1) );
3821 template<
typename VT1 >
3822 friend inline void subAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
3828 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3829 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3831 if( left.rows() == 0UL || left.columns() == 0UL ) {
3843 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3858 template<
typename VT1
3862 static inline void selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3864 if( ( IsDiagonal<MT1>::value ) ||
3865 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3866 ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3867 selectSmallSubAssignKernel( y, A, x, scalar );
3869 selectBlasSubAssignKernel( y, A, x, scalar );
3887 template<
typename VT1
3891 static inline void selectDefaultSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3893 y.subAssign( A * x * scalar );
3911 template<
typename VT1
3915 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3916 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3918 selectDefaultSubAssignKernel( y, A, x, scalar );
3937 template<
typename VT1
3941 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3942 selectSmallSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
3944 typedef IntrinsicTrait<ElementType> IT;
3946 const size_t M( A.rows() );
3947 const size_t N( A.columns() );
3949 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3951 const size_t ipos( remainder ? ( M &
size_t(-
IT::size) ) : M );
3954 const IntrinsicType factor(
set( scalar ) );
3960 const size_t jbegin( ( IsUpper<MT1>::value )
3961 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3963 const size_t jend( ( IsLower<MT1>::value )
3964 ?(
min( i+
IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3968 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3970 for(
size_t j=jbegin; j<jend; ++j ) {
3971 const IntrinsicType x1(
set( x[j] ) );
3972 xmm1 = xmm1 + A.load(i ,j) * x1;
3973 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
3974 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
3975 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
3976 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
3977 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
3978 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
3979 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
3982 y.store( i , y.load(i ) - xmm1*factor );
3994 const size_t jbegin( ( IsUpper<MT1>::value )
3995 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3997 const size_t jend( ( IsLower<MT1>::value )
3998 ?(
min( i+
IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4002 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4004 for(
size_t j=jbegin; j<jend; ++j ) {
4005 const IntrinsicType x1(
set( x[j] ) );
4006 xmm1 = xmm1 + A.load(i ,j) * x1;
4007 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4008 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4009 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4012 y.store( i , y.load(i ) - xmm1*factor );
4020 const size_t jbegin( ( IsUpper<MT1>::value )
4021 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4023 const size_t jend( ( IsLower<MT1>::value )
4024 ?(
min( i+
IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4028 IntrinsicType xmm1, xmm2, xmm3;
4030 for(
size_t j=jbegin; j<jend; ++j ) {
4031 const IntrinsicType x1(
set( x[j] ) );
4032 xmm1 = xmm1 + A.load(i ,j) * x1;
4033 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4034 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4037 y.store( i , y.load(i ) - xmm1*factor );
4044 const size_t jbegin( ( IsUpper<MT1>::value )
4045 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4047 const size_t jend( ( IsLower<MT1>::value )
4048 ?(
min( i+
IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4052 IntrinsicType xmm1, xmm2;
4054 for(
size_t j=jbegin; j<jend; ++j ) {
4055 const IntrinsicType x1(
set( x[j] ) );
4056 xmm1 = xmm1 + A.load(i ,j) * x1;
4057 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
4060 y.store( i , y.load(i ) - xmm1*factor );
4066 const size_t jbegin( ( IsUpper<MT1>::value )
4067 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4069 const size_t jend( ( IsLower<MT1>::value )
4070 ?(
min( i+
IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4076 for(
size_t j=jbegin; j<jend; ++j ) {
4077 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
4080 y.store( i, y.load(i) - xmm1*factor );
4083 for( ; remainder && i<M; ++i )
4085 const size_t jbegin( ( IsUpper<MT1>::value )
4086 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4088 const size_t jend( ( IsLower<MT1>::value )
4089 ?(
min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4095 for(
size_t j=jbegin; j<jend; ++j ) {
4096 value += A(i,j) * x[j];
4099 y[i] -= value * scalar;
4118 template<
typename VT1
4122 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4123 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4125 selectDefaultSubAssignKernel( y, A, x, scalar );
4144 template<
typename VT1
4148 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4149 selectLargeSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4151 typedef IntrinsicTrait<ElementType> IT;
4153 const size_t M( A.rows() );
4154 const size_t N( A.columns() );
4156 const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
4158 const size_t iblock( 32768UL /
sizeof( ElementType ) );
4159 const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
4163 const IntrinsicType factor(
set( scalar ) );
4165 for(
size_t ii=0U; ii<M; ii+=iblock ) {
4166 for(
size_t jj=0UL; jj<N; jj+=jblock )
4168 const size_t jend(
min( jj+jblock, N ) );
4169 const size_t itmp(
min( ii+iblock, M ) );
4170 const size_t iend( ( IsUpper<MT1>::value )
4171 ?(
min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
4174 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
4177 size_t i( ( IsLower<MT1>::value )
4178 ?(
max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) &
size_t(-
IT::size) ) )
4183 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4185 for(
size_t j=jj; j<jend; ++j ) {
4186 const IntrinsicType x1(
set( x[j] ) );
4187 xmm1 = xmm1 + A.load(i ,j) * x1;
4188 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4189 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4190 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4191 xmm5 = xmm5 + A.load(i+
IT::size*4UL,j) * x1;
4192 xmm6 = xmm6 + A.load(i+
IT::size*5UL,j) * x1;
4193 xmm7 = xmm7 + A.load(i+
IT::size*6UL,j) * x1;
4194 xmm8 = xmm8 + A.load(i+
IT::size*7UL,j) * x1;
4197 y.store( i , y.load(i ) - xmm1*factor );
4209 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4211 for(
size_t j=jj; j<jend; ++j ) {
4212 const IntrinsicType x1(
set( x[j] ) );
4213 xmm1 = xmm1 + A.load(i ,j) * x1;
4214 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4215 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4216 xmm4 = xmm4 + A.load(i+
IT::size*3UL,j) * x1;
4219 y.store( i , y.load(i ) - xmm1*factor );
4227 IntrinsicType xmm1, xmm2, xmm3;
4229 for(
size_t j=jj; j<jend; ++j ) {
4230 const IntrinsicType x1(
set( x[j] ) );
4231 xmm1 = xmm1 + A.load(i ,j) * x1;
4232 xmm2 = xmm2 + A.load(i+
IT::size ,j) * x1;
4233 xmm3 = xmm3 + A.load(i+
IT::size*2UL,j) * x1;
4236 y.store( i , y.load(i ) - xmm1*factor );
4243 IntrinsicType xmm1, xmm2;
4245 for(
size_t j=jj; j<jend; ++j ) {
4246 const IntrinsicType x1(
set( x[j] ) );
4247 xmm1 = xmm1 + A.load(i ,j) * x1;
4248 xmm2 = xmm2 + A.load(i+
IT::size,j) * x1;
4251 y.store( i , y.load(i ) - xmm1*factor );
4259 for(
size_t j=jj; j<jend; ++j ) {
4260 xmm1 = xmm1 + A.load(i,j) *
set( x[j] );
4263 y.store( i, y.load(i) - xmm1*factor );
4266 for( ; remainder && i<iend; ++i )
4270 for(
size_t j=jj; j<jend; ++j ) {
4271 value += A(i,j) * x[j];
4274 y[i] -= value * scalar;
4295 template<
typename VT1
4299 static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
4300 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4302 selectLargeSubAssignKernel( y, A, x, scalar );
4321 template<
typename VT1
4325 static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
4326 selectBlasSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x, ST2 scalar )
4330 if( IsTriangular<MT1>::value ) {
4332 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4333 subAssign( y, tmp );
4336 gemv( y, A, x, ET(-scalar), ET(1) );
4358 template<
typename VT1 >
4359 friend inline void multAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4369 const ResultType tmp(
serial( rhs ) );
4370 multAssign( ~lhs, tmp );
4392 template<
typename VT1 >
4393 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4394 smpAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4400 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4401 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4403 if( left.rows() == 0UL ) {
4406 else if( left.columns() == 0UL ) {
4437 template<
typename VT1 >
4438 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4439 smpAssign( SparseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4449 const ResultType tmp( rhs );
4468 template<
typename VT1 >
4469 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4470 smpAddAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4476 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4477 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4479 if( left.rows() == 0UL || left.columns() == 0UL ) {
4513 template<
typename VT1 >
4514 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4515 smpSubAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4521 typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4522 typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4524 if( left.rows() == 0UL || left.columns() == 0UL ) {
4559 template<
typename VT1 >
4560 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4561 smpMultAssign( DenseVector<VT1,false>& lhs,
const DVecScalarMultExpr& rhs )
4571 const ResultType tmp( rhs );
4634 template<
typename T1
4636 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
4660 template<
typename MT,
typename VT >
4677 template<
typename MT,
typename VT >
4679 :
public IsTrue< And< IsAligned<MT>, IsAligned<VT> >::value >
4695 template<
typename MT,
typename VT,
bool AF >
4700 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type
4701 ,
typename SubvectorExprTrait<const VT,AF>::Type >::Type Type;
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, simd_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: ColumnVector.h:79
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:200
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:127
System settings for performance optimizations.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:204
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:139
Constraint on the data type.
Header file for the IsComplexDouble type trait.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:125
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:218
Constraint on the transpose flag of vector types.
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:319
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:353
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:261
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:79
Header file for the IsMatMatMultExpr type trait class.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:209
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:130
Header file for the serial shim.
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:240
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:254
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:329
Header file for the HasConstDataAccess type trait.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:203
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:1232
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:128
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:138
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:341
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:126
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:215
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:212
Header file for BLAS triangular matrix/vector multiplication functions (trmv)
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:202
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:309
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:201
Constraints on the storage order of matrix types.
Header file for the HasMutableDataAccess type trait.
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:129
Header file for all intrinsic functionality.
Header file for BLAS general matrix/vector multiplication functions (gemv)
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:373
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:258
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDMatDVecMultExpr.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the IsComplexFloat type trait.
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:324
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
Header file for the IsUpper type trait.
Header file for exception macros.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:383
Header file for the MatVecMultExpr base class.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:384
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:206
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:205
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:363
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.