35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
118 template<
typename VT
120 class TDVecDMatMultExpr :
public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
121 ,
private TVecMatMultExpr
122 ,
private Computation
151 template<
typename T1 >
152 struct UseSMPAssign {
153 enum { value = ( evaluateVector || evaluateMatrix ) };
163 template<
typename T1,
typename T2,
typename T3 >
164 struct UseBlasKernel {
166 HasMutableDataAccess<T1>::value &&
167 HasConstDataAccess<T2>::value &&
168 HasConstDataAccess<T3>::value &&
169 !IsDiagonal<T3>::value &&
170 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
171 IsBlasCompatible<typename T1::ElementType>::value &&
172 IsBlasCompatible<typename T2::ElementType>::value &&
173 IsBlasCompatible<typename T3::ElementType>::value &&
174 IsSame< typename T1::ElementType, typename T2::ElementType >::value &&
175 IsSame< typename T1::ElementType, typename T3::ElementType >::value };
186 template<
typename T1,
typename T2,
typename T3 >
187 struct UseVectorizedDefaultKernel {
189 !IsDiagonal<T3>::value &&
190 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
192 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
193 IntrinsicTrait<typename T1::ElementType>::addition &&
194 IntrinsicTrait<typename T1::ElementType>::multiplication };
225 VT::vectorizable && MT::vectorizable &&
231 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
232 !evaluateMatrix && MT::smpAssignable };
264 return vec_[index] *
mat_(index,index);
274 const size_t inum( iend - ibegin );
275 const size_t ipos( ibegin + ( ( inum - 1UL ) &
size_t(-2) ) + 1UL );
277 ElementType res(
vec_[ibegin] *
mat_(ibegin,index) );
279 for(
size_t i=ibegin+1UL; i<ipos; i+=2UL ) {
283 res +=
vec_[ipos] *
mat_(ipos,index);
297 inline ReturnType
at(
size_t index )
const {
298 if( index >=
mat_.columns() ) {
301 return (*
this)[index];
311 return mat_.columns();
341 template<
typename T >
343 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
353 template<
typename T >
355 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
365 return vec_.isAligned() &&
mat_.isAligned();
377 (
mat_.rows() *
mat_.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
378 (
size() > SMP_TDVECDMATMULT_THRESHOLD );
401 template<
typename VT1 >
408 if( rhs.mat_.rows() == 0UL ) {
412 else if( rhs.mat_.columns() == 0UL ) {
416 LT x(
serial( rhs.vec_ ) );
417 RT A(
serial( rhs.mat_ ) );
424 TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
440 template<
typename VT1
443 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
447 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
448 selectSmallAssignKernel( y, x, A );
450 selectBlasAssignKernel( y, x, A );
469 template<
typename VT1
472 static inline void selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
474 const size_t M( A.rows() );
475 const size_t N( A.columns() );
477 if( IsStrictlyUpper<MT1>::value ) {
481 if( !IsLower<MT1>::value )
483 const size_t jbegin( IsStrictlyUpper<MT1>::value ? 1UL : 0UL );
484 for(
size_t j=jbegin; j<N; ++j ) {
485 y[j] = x[0UL] * A(0UL,j);
489 for(
size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
491 if( IsDiagonal<MT1>::value )
493 y[i] = x[i] * A(i,i);
497 const size_t jbegin( ( IsUpper<MT1>::value )
498 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
500 const size_t jend( ( IsLower<MT1>::value )
501 ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
505 const size_t jnum( jend - jbegin );
506 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
508 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
509 y[j ] += x[i] * A(i,j );
510 y[j+1UL] += x[i] * A(i,j+1UL);
513 y[jpos] += x[i] * A(i,jpos);
515 if( IsLower<MT1>::value ) {
516 y[jend] = x[i] * A(i,jend);
521 if( IsStrictlyLower<MT1>::value ) {
542 template<
typename VT1
545 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
546 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
548 selectDefaultAssignKernel( y, x, A );
567 template<
typename VT1
570 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
571 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
573 typedef IntrinsicTrait<ElementType> IT;
575 const size_t M( A.rows() );
576 const size_t N( A.columns() );
578 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
580 const size_t jpos( remainder ? ( N &
size_t(-
IT::size) ) : N );
587 const size_t ibegin( ( IsLower<MT1>::value )
588 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
590 const size_t iend( ( IsUpper<MT1>::value )
591 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
595 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
597 for(
size_t i=ibegin; i<iend; ++i ) {
598 const IntrinsicType x1(
set( x[i] ) );
599 xmm1 = xmm1 + x1 * A.load(i,j );
600 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
601 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
602 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
603 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
604 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
605 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
606 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
621 const size_t ibegin( ( IsLower<MT1>::value )
622 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
624 const size_t iend( ( IsUpper<MT1>::value )
625 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
629 IntrinsicType xmm1, xmm2, xmm3, xmm4;
631 for(
size_t i=ibegin; i<iend; ++i ) {
632 const IntrinsicType x1(
set( x[i] ) );
633 xmm1 = xmm1 + x1 * A.load(i,j );
634 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
635 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
636 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
647 const size_t ibegin( ( IsLower<MT1>::value )
648 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
650 const size_t iend( ( IsUpper<MT1>::value )
651 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
655 IntrinsicType xmm1, xmm2, xmm3;
657 for(
size_t i=ibegin; i<iend; ++i ) {
658 const IntrinsicType x1(
set( x[i] ) );
659 xmm1 = xmm1 + x1 * A.load(i,j );
660 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
661 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
671 const size_t ibegin( ( IsLower<MT1>::value )
672 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
674 const size_t iend( ( IsUpper<MT1>::value )
675 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
679 IntrinsicType xmm1, xmm2;
681 for(
size_t i=ibegin; i<iend; ++i ) {
682 const IntrinsicType x1(
set( x[i] ) );
683 xmm1 = xmm1 + x1 * A.load(i,j );
684 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
693 const size_t ibegin( ( IsLower<MT1>::value )
694 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
696 const size_t iend( ( IsUpper<MT1>::value )
697 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
703 for(
size_t i=ibegin; i<iend; ++i ) {
704 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
710 for( ; remainder && j<N; ++j )
712 const size_t ibegin( ( IsLower<MT1>::value )
713 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
715 const size_t iend( ( IsUpper<MT1>::value )
716 ?(
min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
722 for(
size_t i=ibegin; i<iend; ++i ) {
723 value += x[i] * A(i,j);
746 template<
typename VT1
749 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
750 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
752 selectDefaultAssignKernel( y, x, A );
771 template<
typename VT1
774 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
775 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
777 typedef IntrinsicTrait<ElementType> IT;
779 const size_t M( A.rows() );
780 const size_t N( A.columns() );
782 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
784 const size_t jblock( 32768UL /
sizeof( ElementType ) );
785 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
791 for(
size_t jj=0U; jj<N; jj+=jblock ) {
792 for(
size_t ii=0UL; ii<M; ii+=iblock )
794 const size_t iend(
min( ii+iblock, M ) );
795 const size_t jtmp(
min( jj+jblock, N ) );
796 const size_t jend( ( IsLower<MT1>::value )
797 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
800 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
803 size_t j( ( IsUpper<MT1>::value )
804 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
809 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
811 for(
size_t i=ii; i<iend; ++i ) {
812 const IntrinsicType x1(
set( x[i] ) );
813 xmm1 = xmm1 + x1 * A.load(i,j );
814 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
815 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
816 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
817 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
818 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
819 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
820 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
823 y.store( j , y.load(j ) + xmm1 );
835 IntrinsicType xmm1, xmm2, xmm3, xmm4;
837 for(
size_t i=ii; i<iend; ++i ) {
838 const IntrinsicType x1(
set( x[i] ) );
839 xmm1 = xmm1 + x1 * A.load(i,j );
840 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
841 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
842 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
845 y.store( j , y.load(j ) + xmm1 );
853 IntrinsicType xmm1, xmm2, xmm3;
855 for(
size_t i=ii; i<iend; ++i ) {
856 const IntrinsicType x1(
set( x[i] ) );
857 xmm1 = xmm1 + x1 * A.load(i,j );
858 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
859 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
862 y.store( j , y.load(j ) + xmm1 );
869 IntrinsicType xmm1, xmm2;
871 for(
size_t i=ii; i<iend; ++i ) {
872 const IntrinsicType x1(
set( x[i] ) );
873 xmm1 = xmm1 + x1 * A.load(i,j );
874 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
877 y.store( j , y.load(j ) + xmm1 );
885 for(
size_t i=ii; i<iend; ++i ) {
886 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
889 y.store( j, y.load(j) + xmm1 );
892 for( ; remainder && j<jend; ++j )
896 for(
size_t i=ii; i<iend; ++i ) {
897 value += x[i] * A(i,j);
922 template<
typename VT1
925 static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
926 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
928 selectLargeAssignKernel( y, x, A );
948 template<
typename VT1
951 static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
952 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
956 if( IsTriangular<MT1>::value ) {
958 trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
961 gemv( y, x, A, ET(1), ET(0) );
981 template<
typename VT1 >
982 friend inline void assign( SparseVector<VT1,true>& lhs,
const TDVecDMatMultExpr& rhs )
992 const ResultType tmp(
serial( rhs ) );
1011 template<
typename VT1 >
1012 friend inline void addAssign( DenseVector<VT1,true>& lhs,
const TDVecDMatMultExpr& rhs )
1018 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1022 LT x(
serial( rhs.vec_ ) );
1023 RT A(
serial( rhs.mat_ ) );
1030 TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
1046 template<
typename VT1
1049 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1051 if( ( IsDiagonal<MT1>::value ) ||
1052 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1053 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1054 selectSmallAddAssignKernel( y, x, A );
1056 selectBlasAddAssignKernel( y, x, A );
1075 template<
typename VT1
1078 static inline void selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1080 const size_t M( A.rows() );
1081 const size_t N( A.columns() );
1083 for(
size_t i=0UL; i<M; ++i )
1085 if( IsDiagonal<MT1>::value )
1087 y[i] += x[i] * A(i,i);
1091 const size_t jbegin( ( IsUpper<MT1>::value )
1092 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1094 const size_t jend( ( IsLower<MT1>::value )
1095 ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1099 const size_t jnum( jend - jbegin );
1100 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1102 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1103 y[j ] += x[i] * A(i,j );
1104 y[j+1UL] += x[i] * A(i,j+1UL);
1107 y[jpos] += x[i] * A(i,jpos);
1129 template<
typename VT1
1132 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1133 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1135 selectDefaultAddAssignKernel( y, x, A );
1154 template<
typename VT1
1157 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1158 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1160 typedef IntrinsicTrait<ElementType> IT;
1162 const size_t M( A.rows() );
1163 const size_t N( A.columns() );
1165 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1167 const size_t jpos( remainder ? ( N &
size_t(-
IT::size) ) : N );
1174 const size_t ibegin( ( IsLower<MT1>::value )
1175 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1177 const size_t iend( ( IsUpper<MT1>::value )
1178 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1182 IntrinsicType xmm1( y.load(j ) );
1183 IntrinsicType xmm2( y.load(j+
IT::size ) );
1184 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1185 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1186 IntrinsicType xmm5( y.load(j+
IT::size*4UL) );
1187 IntrinsicType xmm6( y.load(j+
IT::size*5UL) );
1188 IntrinsicType xmm7( y.load(j+
IT::size*6UL) );
1189 IntrinsicType xmm8( y.load(j+
IT::size*7UL) );
1191 for(
size_t i=ibegin; i<iend; ++i ) {
1192 const IntrinsicType x1(
set( x[i] ) );
1193 xmm1 = xmm1 + x1 * A.load(i,j );
1194 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1195 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1196 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1197 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
1198 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
1199 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
1200 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
1203 y.store( j , xmm1 );
1215 const size_t ibegin( ( IsLower<MT1>::value )
1216 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1218 const size_t iend( ( IsUpper<MT1>::value )
1219 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1223 IntrinsicType xmm1( y.load(j ) );
1224 IntrinsicType xmm2( y.load(j+
IT::size ) );
1225 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1226 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1228 for(
size_t i=ibegin; i<iend; ++i ) {
1229 const IntrinsicType x1(
set( x[i] ) );
1230 xmm1 = xmm1 + x1 * A.load(i,j );
1231 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1232 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1233 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1236 y.store( j , xmm1 );
1244 const size_t ibegin( ( IsLower<MT1>::value )
1245 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1247 const size_t iend( ( IsUpper<MT1>::value )
1248 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1252 IntrinsicType xmm1( y.load(j ) );
1253 IntrinsicType xmm2( y.load(j+
IT::size ) );
1254 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1256 for(
size_t i=ibegin; i<iend; ++i ) {
1257 const IntrinsicType x1(
set( x[i] ) );
1258 xmm1 = xmm1 + x1 * A.load(i,j );
1259 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1260 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1263 y.store( j , xmm1 );
1270 const size_t ibegin( ( IsLower<MT1>::value )
1271 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1273 const size_t iend( ( IsUpper<MT1>::value )
1274 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1278 IntrinsicType xmm1( y.load(j ) );
1279 IntrinsicType xmm2( y.load(j+
IT::size) );
1281 for(
size_t i=ibegin; i<iend; ++i ) {
1282 const IntrinsicType x1(
set( x[i] ) );
1283 xmm1 = xmm1 + x1 * A.load(i,j );
1284 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
1287 y.store( j , xmm1 );
1293 const size_t ibegin( ( IsLower<MT1>::value )
1294 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1296 const size_t iend( ( IsUpper<MT1>::value )
1297 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1301 IntrinsicType xmm1( y.load(j) );
1303 for(
size_t i=ibegin; i<iend; ++i ) {
1304 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
1310 for( ; remainder && j<N; ++j )
1312 const size_t ibegin( ( IsLower<MT1>::value )
1313 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1315 const size_t iend( ( IsUpper<MT1>::value )
1316 ?(
min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1322 for(
size_t i=ibegin; i<iend; ++i ) {
1323 value += x[i] * A(i,j);
1346 template<
typename VT1
1349 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1350 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1352 selectDefaultAddAssignKernel( y, x, A );
1371 template<
typename VT1
1374 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1375 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1377 typedef IntrinsicTrait<ElementType> IT;
1379 const size_t M( A.rows() );
1380 const size_t N( A.columns() );
1382 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1384 const size_t jblock( 32768UL /
sizeof( ElementType ) );
1385 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
1389 for(
size_t jj=0U; jj<N; jj+=jblock ) {
1390 for(
size_t ii=0UL; ii<M; ii+=iblock )
1392 const size_t iend(
min( ii+iblock, M ) );
1393 const size_t jtmp(
min( jj+jblock, N ) );
1394 const size_t jend( ( IsLower<MT1>::value )
1395 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
1398 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
1401 size_t j( ( IsUpper<MT1>::value )
1402 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
1407 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1409 for(
size_t i=ii; i<iend; ++i ) {
1410 const IntrinsicType x1(
set( x[i] ) );
1411 xmm1 = xmm1 + x1 * A.load(i,j );
1412 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1413 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1414 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1415 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
1416 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
1417 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
1418 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
1421 y.store( j , y.load(j ) + xmm1 );
1433 IntrinsicType xmm1, xmm2, xmm3, xmm4;
1435 for(
size_t i=ii; i<iend; ++i ) {
1436 const IntrinsicType x1(
set( x[i] ) );
1437 xmm1 = xmm1 + x1 * A.load(i,j );
1438 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1439 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1440 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1443 y.store( j , y.load(j ) + xmm1 );
1451 IntrinsicType xmm1, xmm2, xmm3;
1453 for(
size_t i=ii; i<iend; ++i ) {
1454 const IntrinsicType x1(
set( x[i] ) );
1455 xmm1 = xmm1 + x1 * A.load(i,j );
1456 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1457 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1460 y.store( j , y.load(j ) + xmm1 );
1467 IntrinsicType xmm1, xmm2;
1469 for(
size_t i=ii; i<iend; ++i ) {
1470 const IntrinsicType x1(
set( x[i] ) );
1471 xmm1 = xmm1 + x1 * A.load(i,j );
1472 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
1475 y.store( j , y.load(j ) + xmm1 );
1483 for(
size_t i=ii; i<iend; ++i ) {
1484 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
1487 y.store( j, y.load(j) + xmm1 );
1490 for( ; remainder && j<jend; ++j )
1494 for(
size_t i=ii; i<iend; ++i ) {
1495 value += x[i] * A(i,j);
1520 template<
typename VT1
1523 static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
1524 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1526 selectLargeAddAssignKernel( y, x, A );
1546 template<
typename VT1
1549 static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
1550 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1554 if( IsTriangular<MT1>::value ) {
1556 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1557 addAssign( y, tmp );
1560 gemv( y, x, A, ET(1), ET(1) );
1584 template<
typename VT1 >
1585 friend inline void subAssign( DenseVector<VT1,true>& lhs,
const TDVecDMatMultExpr& rhs )
1591 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1595 LT x(
serial( rhs.vec_ ) );
1596 RT A(
serial( rhs.mat_ ) );
1603 TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1619 template<
typename VT1
1622 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1624 if( ( IsDiagonal<MT1>::value ) ||
1625 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1626 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1627 selectSmallSubAssignKernel( y, x, A );
1629 selectBlasSubAssignKernel( y, x, A );
1648 template<
typename VT1
1651 static inline void selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1653 const size_t M( A.rows() );
1654 const size_t N( A.columns() );
1656 for(
size_t i=0UL; i<M; ++i )
1658 if( IsDiagonal<MT1>::value )
1660 y[i] -= x[i] * A(i,i);
1664 const size_t jbegin( ( IsUpper<MT1>::value )
1665 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1667 const size_t jend( ( IsLower<MT1>::value )
1668 ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1672 const size_t jnum( jend - jbegin );
1673 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1675 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1676 y[j ] -= x[i] * A(i,j );
1677 y[j+1UL] -= x[i] * A(i,j+1UL);
1680 y[jpos] -= x[i] * A(i,jpos);
1702 template<
typename VT1
1705 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1706 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1708 selectDefaultSubAssignKernel( y, x, A );
1728 template<
typename VT1
1731 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1732 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1734 typedef IntrinsicTrait<ElementType> IT;
1736 const size_t M( A.rows() );
1737 const size_t N( A.columns() );
1739 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1741 const size_t jpos( remainder ? ( N &
size_t(-
IT::size) ) : N );
1748 const size_t ibegin( ( IsLower<MT1>::value )
1749 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1751 const size_t iend( ( IsUpper<MT1>::value )
1752 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1756 IntrinsicType xmm1( y.load(j ) );
1757 IntrinsicType xmm2( y.load(j+
IT::size ) );
1758 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1759 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1760 IntrinsicType xmm5( y.load(j+
IT::size*4UL) );
1761 IntrinsicType xmm6( y.load(j+
IT::size*5UL) );
1762 IntrinsicType xmm7( y.load(j+
IT::size*6UL) );
1763 IntrinsicType xmm8( y.load(j+
IT::size*7UL) );
1765 for(
size_t i=ibegin; i<iend; ++i ) {
1766 const IntrinsicType x1(
set( x[i] ) );
1767 xmm1 = xmm1 - x1 * A.load(i,j );
1768 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1769 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1770 xmm4 = xmm4 - x1 * A.load(i,j+
IT::size*3UL);
1771 xmm5 = xmm5 - x1 * A.load(i,j+
IT::size*4UL);
1772 xmm6 = xmm6 - x1 * A.load(i,j+
IT::size*5UL);
1773 xmm7 = xmm7 - x1 * A.load(i,j+
IT::size*6UL);
1774 xmm8 = xmm8 - x1 * A.load(i,j+
IT::size*7UL);
1777 y.store( j , xmm1 );
1789 const size_t ibegin( ( IsLower<MT1>::value )
1790 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1792 const size_t iend( ( IsUpper<MT1>::value )
1793 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1797 IntrinsicType xmm1( y.load(j ) );
1798 IntrinsicType xmm2( y.load(j+
IT::size ) );
1799 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1800 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1802 for(
size_t i=ibegin; i<iend; ++i ) {
1803 const IntrinsicType x1(
set( x[i] ) );
1804 xmm1 = xmm1 - x1 * A.load(i,j );
1805 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1806 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1807 xmm4 = xmm4 - x1 * A.load(i,j+
IT::size*3UL);
1810 y.store( j , xmm1 );
1818 const size_t ibegin( ( IsLower<MT1>::value )
1819 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1821 const size_t iend( ( IsUpper<MT1>::value )
1822 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1826 IntrinsicType xmm1( y.load(j ) );
1827 IntrinsicType xmm2( y.load(j+
IT::size ) );
1828 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1830 for(
size_t i=ibegin; i<iend; ++i ) {
1831 const IntrinsicType x1(
set( x[i] ) );
1832 xmm1 = xmm1 - x1 * A.load(i,j );
1833 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1834 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1837 y.store( j , xmm1 );
1844 const size_t ibegin( ( IsLower<MT1>::value )
1845 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1847 const size_t iend( ( IsUpper<MT1>::value )
1848 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1852 IntrinsicType xmm1( y.load(j ) );
1853 IntrinsicType xmm2( y.load(j+
IT::size) );
1855 for(
size_t i=ibegin; i<iend; ++i ) {
1856 const IntrinsicType x1(
set( x[i] ) );
1857 xmm1 = xmm1 - x1 * A.load(i,j );
1858 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size);
1861 y.store( j , xmm1 );
1867 const size_t ibegin( ( IsLower<MT1>::value )
1868 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1870 const size_t iend( ( IsUpper<MT1>::value )
1871 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1875 IntrinsicType xmm1( y.load(j) );
1877 for(
size_t i=ibegin; i<iend; ++i ) {
1878 xmm1 = xmm1 -
set( x[i] ) * A.load(i,j);
1884 for( ; remainder && j<N; ++j )
1886 const size_t ibegin( ( IsLower<MT1>::value )
1887 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1889 const size_t iend( ( IsUpper<MT1>::value )
1890 ?(
min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1896 for(
size_t i=ibegin; i<iend; ++i ) {
1897 value += x[i] * A(i,j);
1920 template<
typename VT1
1923 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1924 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1926 selectDefaultSubAssignKernel( y, x, A );
1946 template<
typename VT1
1949 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1950 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1952 typedef IntrinsicTrait<ElementType> IT;
1954 const size_t M( A.rows() );
1955 const size_t N( A.columns() );
1957 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1959 const size_t jblock( 32768UL /
sizeof( ElementType ) );
1960 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
1964 for(
size_t jj=0U; jj<N; jj+=jblock ) {
1965 for(
size_t ii=0UL; ii<M; ii+=iblock )
1967 const size_t iend(
min( ii+iblock, M ) );
1968 const size_t jtmp(
min( jj+jblock, N ) );
1969 const size_t jend( ( IsLower<MT1>::value )
1970 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
1973 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
1976 size_t j( ( IsUpper<MT1>::value )
1977 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
1982 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1984 for(
size_t i=ii; i<iend; ++i ) {
1985 const IntrinsicType x1(
set( x[i] ) );
1986 xmm1 = xmm1 + x1 * A.load(i,j );
1987 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1988 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1989 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1990 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
1991 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
1992 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
1993 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
1996 y.store( j , y.load(j ) - xmm1 );
2008 IntrinsicType xmm1, xmm2, xmm3, xmm4;
2010 for(
size_t i=ii; i<iend; ++i ) {
2011 const IntrinsicType x1(
set( x[i] ) );
2012 xmm1 = xmm1 + x1 * A.load(i,j );
2013 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2014 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2015 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2018 y.store( j , y.load(j ) - xmm1 );
2026 IntrinsicType xmm1, xmm2, xmm3;
2028 for(
size_t i=ii; i<iend; ++i ) {
2029 const IntrinsicType x1(
set( x[i] ) );
2030 xmm1 = xmm1 + x1 * A.load(i,j );
2031 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2032 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2035 y.store( j , y.load(j ) - xmm1 );
2042 IntrinsicType xmm1, xmm2;
2044 for(
size_t i=ii; i<iend; ++i ) {
2045 const IntrinsicType x1(
set( x[i] ) );
2046 xmm1 = xmm1 + x1 * A.load(i,j );
2047 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
2050 y.store( j , y.load(j ) - xmm1 );
2058 for(
size_t i=ii; i<iend; ++i ) {
2059 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2062 y.store( j, y.load(j) - xmm1 );
2065 for( ; remainder && j<jend; ++j )
2069 for(
size_t i=ii; i<iend; ++i ) {
2070 value += x[i] * A(i,j);
2095 template<
typename VT1
2098 static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
2099 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2101 selectLargeSubAssignKernel( y, x, A );
2121 template<
typename VT1
2124 static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
2125 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2129 if( IsTriangular<MT1>::value ) {
2131 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2132 subAssign( y, tmp );
2135 gemv( y, x, A, ET(-1), ET(1) );
2159 template<
typename VT1 >
2160 friend inline void multAssign( DenseVector<VT1,true>& lhs,
const TDVecDMatMultExpr& rhs )
2170 const ResultType tmp(
serial( rhs ) );
2171 multAssign( ~lhs, tmp );
2195 template<
typename VT1 >
2196 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2203 if( rhs.mat_.rows() == 0UL ) {
2207 else if( rhs.mat_.columns() == 0UL ) {
2239 template<
typename VT1 >
2240 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2251 const ResultType tmp( rhs );
2272 template<
typename VT1 >
2273 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2280 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2316 template<
typename VT1 >
2317 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2324 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2360 template<
typename VT1 >
2361 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2372 const ResultType tmp( rhs );
2411 template<
typename VT
2415 :
public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
2416 ,
private VecScalarMultExpr
2417 ,
private Computation
2421 typedef TDVecDMatMultExpr<VT,MT> VMM;
2433 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2438 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2439 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2447 template<
typename T1 >
2448 struct UseSMPAssign {
2449 enum { value = ( evaluateVector || evaluateMatrix ) };
2457 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2458 struct UseBlasKernel {
2460 HasMutableDataAccess<T1>::value &&
2461 HasConstDataAccess<T2>::value &&
2462 HasConstDataAccess<T3>::value &&
2463 !IsDiagonal<T3>::value &&
2464 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2465 IsBlasCompatible<typename T1::ElementType>::value &&
2466 IsBlasCompatible<typename T2::ElementType>::value &&
2467 IsBlasCompatible<typename T3::ElementType>::value &&
2468 IsSame< typename T1::ElementType, typename T2::ElementType >::value &&
2469 IsSame< typename T1::ElementType, typename T3::ElementType >::value &&
2470 !( IsBuiltin<typename T1::ElementType>::value && IsComplex<T4>::value ) };
2479 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2480 struct UseVectorizedDefaultKernel {
2482 !IsDiagonal<T3>::value &&
2483 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2484 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2485 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2486 IsSame<typename T1::ElementType,T4>::value &&
2487 IntrinsicTrait<typename T1::ElementType>::addition &&
2488 IntrinsicTrait<typename T1::ElementType>::multiplication };
2494 typedef DVecScalarMultExpr<VMM,ST,true>
This;
2495 typedef typename MultTrait<RES,ST>::Type
ResultType;
2498 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2503 typedef const TDVecDMatMultExpr<VT,MT>
LeftOperand;
2509 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
LT;
2512 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
RT;
2517 enum { vectorizable = !IsDiagonal<MT>::value &&
2518 VT::vectorizable && MT::vectorizable &&
2519 IsSame<VET,MET>::value &&
2520 IsSame<VET,ST>::value &&
2521 IntrinsicTrait<VET>::addition &&
2522 IntrinsicTrait<VET>::multiplication };
2525 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2526 !evaluateMatrix && MT::smpAssignable };
2535 explicit inline DVecScalarMultExpr(
const VMM& vector, ST scalar )
2547 inline ReturnType
operator[](
size_t index )
const {
2549 return vector_[index] * scalar_;
2560 inline ReturnType
at(
size_t index )
const {
2561 if( index >= vector_.size() ) {
2564 return (*
this)[index];
2573 inline size_t size()
const {
2574 return vector_.size();
2604 template<
typename T >
2605 inline bool canAlias(
const T* alias )
const {
2606 return vector_.canAlias( alias );
2616 template<
typename T >
2617 inline bool isAliased(
const T* alias )
const {
2618 return vector_.isAliased( alias );
2628 return vector_.isAligned();
2638 typename VMM::RightOperand A( vector_.rightOperand() );
2640 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2641 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
2642 (
size() > SMP_TDVECDMATMULT_THRESHOLD );
2648 LeftOperand vector_;
2649 RightOperand scalar_;
2664 template<
typename VT1 >
2665 friend inline void assign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2671 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2672 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2674 if( right.rows() == 0UL ) {
2678 else if( right.columns() == 0UL ) {
2690 DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2705 template<
typename VT1
2709 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2711 if( ( IsDiagonal<MT1>::value ) ||
2712 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2713 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2714 selectSmallAssignKernel( y, x, A, scalar );
2716 selectBlasAssignKernel( y, x, A, scalar );
2734 template<
typename VT1
2738 static inline void selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2740 const size_t M( A.rows() );
2741 const size_t N( A.columns() );
2743 if( IsStrictlyUpper<MT1>::value ) {
2747 if( !IsLower<MT1>::value )
2749 for(
size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<N; ++j ) {
2750 y[j] = x[0UL] * A(0UL,j);
2754 for(
size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
2756 if( IsDiagonal<MT1>::value )
2758 y[i] = x[i] * A(i,i) * scalar;
2762 const size_t jbegin( ( IsUpper<MT1>::value )
2763 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2765 const size_t jend( ( IsLower<MT1>::value )
2766 ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
2770 const size_t jnum( jend - jbegin );
2771 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
2773 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
2774 y[j ] += x[i] * A(i,j );
2775 y[j+1UL] += x[i] * A(i,j+1UL);
2778 y[jpos] += x[i] * A(i,jpos);
2780 if( IsLower<MT1>::value ) {
2781 y[jend] = x[i] * A(i,jend);
2786 if( IsStrictlyLower<MT1>::value ) {
2790 if( !IsDiagonal<MT1>::value )
2792 const size_t iend( IsStrictlyLower<MT1>::value ? N-1UL : N );
2793 for(
size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<iend; ++j ) {
2814 template<
typename VT1
2818 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2819 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2821 selectDefaultAssignKernel( y, x, A, scalar );
2839 template<
typename VT1
2843 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2844 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2846 typedef IntrinsicTrait<ElementType> IT;
2848 const size_t M( A.rows() );
2849 const size_t N( A.columns() );
2851 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
2853 const size_t jpos( remainder ? ( N &
size_t(-
IT::size) ) : N );
2856 const IntrinsicType factor(
set( scalar ) );
2862 const size_t ibegin( ( IsLower<MT1>::value )
2863 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2865 const size_t iend( ( IsUpper<MT1>::value )
2866 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2870 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2872 for(
size_t i=ibegin; i<iend; ++i ) {
2873 const IntrinsicType x1(
set( x[i] ) );
2874 xmm1 = xmm1 + x1 * A.load(i,j );
2875 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2876 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2877 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2878 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
2879 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
2880 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
2881 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
2884 y.store( j , xmm1*factor );
2885 y.store( j+
IT::size , xmm2*factor );
2886 y.store( j+
IT::size*2UL, xmm3*factor );
2887 y.store( j+
IT::size*3UL, xmm4*factor );
2888 y.store( j+
IT::size*4UL, xmm5*factor );
2889 y.store( j+
IT::size*5UL, xmm6*factor );
2890 y.store( j+
IT::size*6UL, xmm7*factor );
2891 y.store( j+
IT::size*7UL, xmm8*factor );
2896 const size_t ibegin( ( IsLower<MT1>::value )
2897 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2899 const size_t iend( ( IsUpper<MT1>::value )
2900 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2904 IntrinsicType xmm1, xmm2, xmm3, xmm4;
2906 for(
size_t i=ibegin; i<iend; ++i ) {
2907 const IntrinsicType x1(
set( x[i] ) );
2908 xmm1 = xmm1 + x1 * A.load(i,j );
2909 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2910 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2911 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2914 y.store( j , xmm1*factor );
2915 y.store( j+
IT::size , xmm2*factor );
2916 y.store( j+
IT::size*2UL, xmm3*factor );
2917 y.store( j+
IT::size*3UL, xmm4*factor );
2922 const size_t ibegin( ( IsLower<MT1>::value )
2923 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2925 const size_t iend( ( IsUpper<MT1>::value )
2926 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2930 IntrinsicType xmm1, xmm2, xmm3;
2932 for(
size_t i=ibegin; i<iend; ++i ) {
2933 const IntrinsicType x1(
set( x[i] ) );
2934 xmm1 = xmm1 + x1 * A.load(i,j );
2935 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2936 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2939 y.store( j , xmm1*factor );
2940 y.store( j+
IT::size , xmm2*factor );
2941 y.store( j+
IT::size*2UL, xmm3*factor );
2946 const size_t ibegin( ( IsLower<MT1>::value )
2947 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2949 const size_t iend( ( IsUpper<MT1>::value )
2950 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2954 IntrinsicType xmm1, xmm2;
2956 for(
size_t i=ibegin; i<iend; ++i ) {
2957 const IntrinsicType x1(
set( x[i] ) );
2958 xmm1 = xmm1 + x1 * A.load(i,j );
2959 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
2962 y.store( j , xmm1*factor );
2963 y.store( j+
IT::size, xmm2*factor );
2968 const size_t ibegin( ( IsLower<MT1>::value )
2969 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2971 const size_t iend( ( IsUpper<MT1>::value )
2972 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2978 for(
size_t i=ibegin; i<iend; ++i ) {
2979 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2982 y.store( j, xmm1*factor );
2985 for( ; remainder && j<N; ++j )
2987 const size_t ibegin( ( IsLower<MT1>::value )
2988 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2990 const size_t iend( ( IsUpper<MT1>::value )
2991 ?(
min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2997 for(
size_t i=ibegin; i<iend; ++i ) {
2998 value += x[i] * A(i,j);
3001 y[j] = value * scalar;
3020 template<
typename VT1
3024 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3025 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3027 selectDefaultAssignKernel( y, x, A, scalar );
3045 template<
typename VT1
3049 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3050 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3052 typedef IntrinsicTrait<ElementType> IT;
3054 const size_t M( A.rows() );
3055 const size_t N( A.columns() );
3057 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3059 const size_t jblock( 32768UL /
sizeof( ElementType ) );
3060 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3062 const IntrinsicType factor(
set( scalar ) );
3068 for(
size_t jj=0U; jj<N; jj+=jblock ) {
3069 for(
size_t ii=0UL; ii<M; ii+=iblock )
3071 const size_t iend(
min( ii+iblock, M ) );
3072 const size_t jtmp(
min( jj+jblock, N ) );
3073 const size_t jend( ( IsLower<MT1>::value )
3074 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3077 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
3080 size_t j( ( IsUpper<MT1>::value )
3081 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
3086 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3088 for(
size_t i=ii; i<iend; ++i ) {
3089 const IntrinsicType x1(
set( x[i] ) );
3090 xmm1 = xmm1 + x1 * A.load(i,j );
3091 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3092 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3093 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3094 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3095 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3096 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3097 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3100 y.store( j , y.load(j ) + xmm1*factor );
3112 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3114 for(
size_t i=ii; i<iend; ++i ) {
3115 const IntrinsicType x1(
set( x[i] ) );
3116 xmm1 = xmm1 + x1 * A.load(i,j );
3117 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3118 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3119 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3122 y.store( j , y.load(j ) + xmm1*factor );
3130 IntrinsicType xmm1, xmm2, xmm3;
3132 for(
size_t i=ii; i<iend; ++i ) {
3133 const IntrinsicType x1(
set( x[i] ) );
3134 xmm1 = xmm1 + x1 * A.load(i,j );
3135 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3136 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3139 y.store( j , y.load(j ) + xmm1*factor );
3146 IntrinsicType xmm1, xmm2;
3148 for(
size_t i=ii; i<iend; ++i ) {
3149 const IntrinsicType x1(
set( x[i] ) );
3150 xmm1 = xmm1 + x1 * A.load(i,j );
3151 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3154 y.store( j , y.load(j ) + xmm1*factor );
3162 for(
size_t i=ii; i<iend; ++i ) {
3163 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3166 y.store( j, y.load(j) + xmm1*factor );
3169 for( ; remainder && j<jend; ++j )
3173 for(
size_t i=ii; i<iend; ++i ) {
3174 value += x[i] * A(i,j);
3177 y[j] += value * scalar;
3197 template<
typename VT1
3201 static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3202 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3204 selectLargeAssignKernel( y, x, A, scalar );
3223 template<
typename VT1
3227 static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3228 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3232 if( IsTriangular<MT1>::value ) {
3233 assign( y, scalar * x );
3234 trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3237 gemv( y, x, A, ET(scalar), ET(0) );
3255 template<
typename VT1 >
3256 friend inline void assign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3266 const ResultType tmp(
serial( rhs ) );
3267 assign( ~lhs, tmp );
3283 template<
typename VT1 >
3284 friend inline void addAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3290 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3291 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3293 if( right.rows() == 0UL || right.columns() == 0UL ) {
3305 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
3320 template<
typename VT1
3324 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3326 if( ( IsDiagonal<MT1>::value ) ||
3327 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3328 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3329 selectSmallAddAssignKernel( y, x, A, scalar );
3331 selectBlasAddAssignKernel( y, x, A, scalar );
3349 template<
typename VT1
3353 static inline void selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3355 y.addAssign( x * A * scalar );
3373 template<
typename VT1
3377 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3378 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3380 selectDefaultAddAssignKernel( y, x, A, scalar );
3399 template<
typename VT1
3403 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3404 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3406 typedef IntrinsicTrait<ElementType> IT;
3408 const size_t M( A.rows() );
3409 const size_t N( A.columns() );
3411 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3413 const size_t jpos( remainder ? ( N &
size_t(-
IT::size) ) : N );
3416 const IntrinsicType factor(
set( scalar ) );
3422 const size_t ibegin( ( IsLower<MT1>::value )
3423 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3425 const size_t iend( ( IsUpper<MT1>::value )
3426 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3430 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3432 for(
size_t i=ibegin; i<iend; ++i ) {
3433 const IntrinsicType x1(
set( x[i] ) );
3434 xmm1 = xmm1 + x1 * A.load(i,j );
3435 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3436 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3437 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3438 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3439 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3440 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3441 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3444 y.store( j , y.load(j ) + xmm1*factor );
3456 const size_t ibegin( ( IsLower<MT1>::value )
3457 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3459 const size_t iend( ( IsUpper<MT1>::value )
3460 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3464 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3466 for(
size_t i=ibegin; i<iend; ++i ) {
3467 const IntrinsicType x1(
set( x[i] ) );
3468 xmm1 = xmm1 + x1 * A.load(i,j );
3469 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3470 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3471 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3474 y.store( j , y.load(j ) + xmm1*factor );
3482 const size_t ibegin( ( IsLower<MT1>::value )
3483 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3485 const size_t iend( ( IsUpper<MT1>::value )
3486 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3490 IntrinsicType xmm1, xmm2, xmm3;
3492 for(
size_t i=ibegin; i<iend; ++i ) {
3493 const IntrinsicType x1(
set( x[i] ) );
3494 xmm1 = xmm1 + x1 * A.load(i,j );
3495 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3496 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3499 y.store( j , y.load(j ) + xmm1*factor );
3506 const size_t ibegin( ( IsLower<MT1>::value )
3507 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3509 const size_t iend( ( IsUpper<MT1>::value )
3510 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3514 IntrinsicType xmm1, xmm2;
3516 for(
size_t i=ibegin; i<iend; ++i ) {
3517 const IntrinsicType x1(
set( x[i] ) );
3518 xmm1 = xmm1 + x1 * A.load(i,j );
3519 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3522 y.store( j , y.load(j ) + xmm1*factor );
3528 const size_t ibegin( ( IsLower<MT1>::value )
3529 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3531 const size_t iend( ( IsUpper<MT1>::value )
3532 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3538 for(
size_t i=ibegin; i<iend; ++i ) {
3539 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3542 y.store( j, y.load(j) + xmm1*factor );
3545 for( ; remainder && j<N; ++j )
3547 const size_t ibegin( ( IsLower<MT1>::value )
3548 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3550 const size_t iend( ( IsUpper<MT1>::value )
3551 ?(
min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3557 for(
size_t i=ibegin; i<iend; ++i ) {
3558 value += x[i] * A(i,j);
3561 y[j] += value * scalar;
3580 template<
typename VT1
3584 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3585 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3587 selectDefaultAddAssignKernel( y, x, A, scalar );
3606 template<
typename VT1
3610 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3611 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3613 typedef IntrinsicTrait<ElementType> IT;
3615 const size_t M( A.rows() );
3616 const size_t N( A.columns() );
3618 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3620 const size_t jblock( 32768UL /
sizeof( ElementType ) );
3621 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3623 const IntrinsicType factor(
set( scalar ) );
3627 for(
size_t jj=0U; jj<N; jj+=jblock ) {
3628 for(
size_t ii=0UL; ii<M; ii+=iblock )
3630 const size_t iend(
min( ii+iblock, M ) );
3631 const size_t jtmp(
min( jj+jblock, N ) );
3632 const size_t jend( ( IsLower<MT1>::value )
3633 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3636 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
3639 size_t j( ( IsUpper<MT1>::value )
3640 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
3645 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3647 for(
size_t i=ii; i<iend; ++i ) {
3648 const IntrinsicType x1(
set( x[i] ) );
3649 xmm1 = xmm1 + x1 * A.load(i,j );
3650 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3651 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3652 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3653 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3654 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3655 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3656 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3659 y.store( j , y.load(j ) + xmm1*factor );
3671 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3673 for(
size_t i=ii; i<iend; ++i ) {
3674 const IntrinsicType x1(
set( x[i] ) );
3675 xmm1 = xmm1 + x1 * A.load(i,j );
3676 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3677 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3678 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3681 y.store( j , y.load(j ) + xmm1*factor );
3689 IntrinsicType xmm1, xmm2, xmm3;
3691 for(
size_t i=ii; i<iend; ++i ) {
3692 const IntrinsicType x1(
set( x[i] ) );
3693 xmm1 = xmm1 + x1 * A.load(i,j );
3694 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3695 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3698 y.store( j , y.load(j ) + xmm1*factor );
3705 IntrinsicType xmm1, xmm2;
3707 for(
size_t i=ii; i<iend; ++i ) {
3708 const IntrinsicType x1(
set( x[i] ) );
3709 xmm1 = xmm1 + x1 * A.load(i,j );
3710 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3713 y.store( j , y.load(j ) + xmm1*factor );
3721 for(
size_t i=ii; i<iend; ++i ) {
3722 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3725 y.store( j, y.load(j) + xmm1*factor );
3728 for( ; remainder && j<jend; ++j )
3732 for(
size_t i=ii; i<iend; ++i ) {
3733 value += x[i] * A(i,j);
3736 y[j] += value * scalar;
3757 template<
typename VT1
3761 static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3762 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3764 selectLargeAddAssignKernel( y, x, A, scalar );
3783 template<
typename VT1
3787 static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3788 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3792 if( IsTriangular<MT1>::value ) {
3794 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3795 addAssign( y, tmp );
3798 gemv( y, x, A, ET(scalar), ET(1) );
3820 template<
typename VT1 >
3821 friend inline void subAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3827 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3828 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3830 if( right.rows() == 0UL || right.columns() == 0UL ) {
3842 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
3857 template<
typename VT1
3861 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3863 if( ( IsDiagonal<MT1>::value ) ||
3864 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3865 ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3866 selectSmallSubAssignKernel( y, x, A, scalar );
3868 selectBlasSubAssignKernel( y, x, A, scalar );
3886 template<
typename VT1
3890 static inline void selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3892 y.subAssign( x * A * scalar );
3910 template<
typename VT1
3914 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3915 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3917 selectDefaultSubAssignKernel( y, x, A, scalar );
3936 template<
typename VT1
3940 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3941 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3943 typedef IntrinsicTrait<ElementType> IT;
3945 const size_t M( A.rows() );
3946 const size_t N( A.columns() );
3948 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3950 const size_t jpos( remainder ? ( N &
size_t(-
IT::size) ) : N );
3953 const IntrinsicType factor(
set( scalar ) );
3959 const size_t ibegin( ( IsLower<MT1>::value )
3960 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3962 const size_t iend( ( IsUpper<MT1>::value )
3963 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3967 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3969 for(
size_t i=ibegin; i<iend; ++i ) {
3970 const IntrinsicType x1(
set( x[i] ) );
3971 xmm1 = xmm1 + x1 * A.load(i,j );
3972 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3973 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3974 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3975 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3976 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3977 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3978 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3981 y.store( j , y.load(j ) - xmm1*factor );
3993 const size_t ibegin( ( IsLower<MT1>::value )
3994 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3996 const size_t iend( ( IsUpper<MT1>::value )
3997 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4001 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4003 for(
size_t i=ibegin; i<iend; ++i ) {
4004 const IntrinsicType x1(
set( x[i] ) );
4005 xmm1 = xmm1 + x1 * A.load(i,j );
4006 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4007 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4008 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4011 y.store( j , y.load(j ) - xmm1*factor );
4019 const size_t ibegin( ( IsLower<MT1>::value )
4020 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4022 const size_t iend( ( IsUpper<MT1>::value )
4023 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4027 IntrinsicType xmm1, xmm2, xmm3;
4029 for(
size_t i=ibegin; i<iend; ++i ) {
4030 const IntrinsicType x1(
set( x[i] ) );
4031 xmm1 = xmm1 + x1 * A.load(i,j );
4032 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4033 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4036 y.store( j , y.load(j ) - xmm1*factor );
4043 const size_t ibegin( ( IsLower<MT1>::value )
4044 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4046 const size_t iend( ( IsUpper<MT1>::value )
4047 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4051 IntrinsicType xmm1, xmm2;
4053 for(
size_t i=ibegin; i<iend; ++i ) {
4054 const IntrinsicType x1(
set( x[i] ) );
4055 xmm1 = xmm1 + x1 * A.load(i,j );
4056 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
4059 y.store( j , y.load(j ) - xmm1*factor );
4065 const size_t ibegin( ( IsLower<MT1>::value )
4066 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4068 const size_t iend( ( IsUpper<MT1>::value )
4069 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4075 for(
size_t i=ibegin; i<iend; ++i ) {
4076 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
4079 y.store( j, y.load(j) - xmm1*factor );
4082 for( ; remainder && j<N; ++j )
4084 const size_t ibegin( ( IsLower<MT1>::value )
4085 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4087 const size_t iend( ( IsUpper<MT1>::value )
4088 ?(
min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4094 for(
size_t i=ibegin; i<iend; ++i ) {
4095 value += x[i] * A(i,j);
4098 y[j] -= value * scalar;
4117 template<
typename VT1
4121 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4122 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4124 selectDefaultSubAssignKernel( y, x, A, scalar );
4143 template<
typename VT1
4147 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4148 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4150 typedef IntrinsicTrait<ElementType> IT;
4152 const size_t M( A.rows() );
4153 const size_t N( A.columns() );
4155 const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
4157 const size_t jblock( 32768UL /
sizeof( ElementType ) );
4158 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
4160 const IntrinsicType factor(
set( scalar ) );
4164 for(
size_t jj=0U; jj<N; jj+=jblock ) {
4165 for(
size_t ii=0UL; ii<M; ii+=iblock )
4167 const size_t iend(
min( ii+iblock, M ) );
4168 const size_t jtmp(
min( jj+jblock, N ) );
4169 const size_t jend( ( IsLower<MT1>::value )
4170 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
4173 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
4176 size_t j( ( IsUpper<MT1>::value )
4177 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
4182 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4184 for(
size_t i=ii; i<iend; ++i ) {
4185 const IntrinsicType x1(
set( x[i] ) );
4186 xmm1 = xmm1 + x1 * A.load(i,j );
4187 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4188 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4189 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4190 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
4191 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
4192 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
4193 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
4196 y.store( j , y.load(j ) - xmm1*factor );
4208 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4210 for(
size_t i=ii; i<iend; ++i ) {
4211 const IntrinsicType x1(
set( x[i] ) );
4212 xmm1 = xmm1 + x1 * A.load(i,j );
4213 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4214 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4215 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4218 y.store( j , y.load(j ) - xmm1*factor );
4226 IntrinsicType xmm1, xmm2, xmm3;
4228 for(
size_t i=ii; i<iend; ++i ) {
4229 const IntrinsicType x1(
set( x[i] ) );
4230 xmm1 = xmm1 + x1 * A.load(i,j );
4231 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4232 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4235 y.store( j , y.load(j ) - xmm1*factor );
4242 IntrinsicType xmm1, xmm2;
4244 for(
size_t i=ii; i<iend; ++i ) {
4245 const IntrinsicType x1(
set( x[i] ) );
4246 xmm1 = xmm1 + x1 * A.load(i,j );
4247 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
4250 y.store( j , y.load(j ) - xmm1*factor );
4258 for(
size_t i=ii; i<iend; ++i ) {
4259 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
4262 y.store( j, y.load(j) - xmm1*factor );
4265 for( ; remainder && j<jend; ++j )
4269 for(
size_t i=ii; i<iend; ++i ) {
4270 value += x[i] * A(i,j);
4273 y[j] -= value * scalar;
4294 template<
typename VT1
4298 static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
4299 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4301 selectLargeSubAssignKernel( y, x, A, scalar );
4320 template<
typename VT1
4324 static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
4325 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4329 if( IsTriangular<MT1>::value ) {
4331 trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4332 subAssign( y, tmp );
4335 gemv( y, x, A, ET(-scalar), ET(1) );
4357 template<
typename VT1 >
4358 friend inline void multAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4368 const ResultType tmp(
serial( rhs ) );
4369 multAssign( ~lhs, tmp );
4391 template<
typename VT1 >
4392 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4393 smpAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4399 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4400 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4402 if( right.rows() == 0UL ) {
4406 else if( right.columns() == 0UL ) {
4436 template<
typename VT1 >
4437 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4438 smpAssign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4448 const ResultType tmp( rhs );
4467 template<
typename VT1 >
4468 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4469 smpAddAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4475 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4476 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4478 if( right.rows() == 0UL || right.columns() == 0UL ) {
4512 template<
typename VT1 >
4513 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4514 smpSubAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4520 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4521 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4523 if( right.rows() == 0UL || right.columns() == 0UL ) {
4558 template<
typename VT1 >
4559 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4560 smpMultAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4570 const ResultType tmp( rhs );
4633 template<
typename T1
4635 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
4640 if( (~vec).
size() != (~mat).
rows() ) {
4670 template<
typename T1
4673 inline const typename EnableIf< IsMatMatMultExpr<T2>,
typename MultExprTrait<T1,T2>::Type >::Type
4695 template<
typename VT,
typename MT >
4712 template<
typename VT,
typename MT >
4714 :
public IsTrue< And< IsAligned<VT>, IsAligned<MT> >::value >
4730 template<
typename VT,
typename MT,
bool AF >
4735 typedef typename MultExprTrait< typename SubvectorExprTrait<const VT,AF>::Type
4736 ,
typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:126
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, simd_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:206
Header file for mathematical functions.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:320
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:129
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:308
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:384
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:219
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:310
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:207
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:204
Header file for the IsComplexDouble type trait.
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:202
Constraint on the data type.
Header file for the MultExprTrait class template.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:374
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:330
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:131
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:261
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:255
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:127
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:241
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:216
Header file for the IsMatMatMultExpr type trait class.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the Columns type trait.
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:213
Header file for the IsBlasCompatible type trait.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:203
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:354
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:205
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:385
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:364
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:201
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:130
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:79
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:1232
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:138
Header file for BLAS triangular matrix/vector multiplication functions (trmv)
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
Header file for the reset shim.
Constraint on the data type.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
Constraint on the data type.
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:146
Header file for the HasMutableDataAccess type trait.
Header file for all intrinsic functionality.
Header file for BLAS general matrix/vector multiplication functions (gemv)
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDVecDMatMultExpr.h:297
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:128
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:258
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:342
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the IsComplexFloat type trait.
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: RowVector.h:79
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the IsUpper type trait.
Header file for exception macros.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:210
Constraint on the transpose flag of vector types.
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.