35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
112 template<
typename VT
114 class TDVecDMatMultExpr :
public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
115 ,
private TVecMatMultExpr
116 ,
private Computation
145 template<
typename T1 >
146 struct UseSMPAssign {
147 enum { value = ( evaluateVector || evaluateMatrix ) };
158 template<
typename T1,
typename T2,
typename T3 >
159 struct UseSinglePrecisionKernel {
161 HasMutableDataAccess<T1>::value &&
162 HasConstDataAccess<T2>::value &&
163 HasConstDataAccess<T3>::value &&
164 !IsDiagonal<T3>::value &&
165 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
166 IsFloat<typename T1::ElementType>::value &&
167 IsFloat<typename T2::ElementType>::value &&
168 IsFloat<typename T3::ElementType>::value };
179 template<
typename T1,
typename T2,
typename T3 >
180 struct UseDoublePrecisionKernel {
182 HasMutableDataAccess<T1>::value &&
183 HasConstDataAccess<T2>::value &&
184 HasConstDataAccess<T3>::value &&
185 !IsDiagonal<T3>::value &&
186 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
187 IsDouble<typename T1::ElementType>::value &&
188 IsDouble<typename T2::ElementType>::value &&
189 IsDouble<typename T3::ElementType>::value };
200 template<
typename T1,
typename T2,
typename T3 >
201 struct UseSinglePrecisionComplexKernel {
202 typedef complex<float> Type;
204 HasMutableDataAccess<T1>::value &&
205 HasConstDataAccess<T2>::value &&
206 HasConstDataAccess<T3>::value &&
207 !IsDiagonal<T3>::value &&
208 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
209 IsSame<typename T1::ElementType,Type>::value &&
210 IsSame<typename T2::ElementType,Type>::value &&
211 IsSame<typename T3::ElementType,Type>::value };
222 template<
typename T1,
typename T2,
typename T3 >
223 struct UseDoublePrecisionComplexKernel {
224 typedef complex<double> Type;
226 HasMutableDataAccess<T1>::value &&
227 HasConstDataAccess<T2>::value &&
228 HasConstDataAccess<T3>::value &&
229 !IsDiagonal<T3>::value &&
230 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
231 IsSame<typename T1::ElementType,Type>::value &&
232 IsSame<typename T2::ElementType,Type>::value &&
233 IsSame<typename T3::ElementType,Type>::value };
243 template<
typename T1,
typename T2,
typename T3 >
244 struct UseDefaultKernel {
245 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
246 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
247 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
248 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
259 template<
typename T1,
typename T2,
typename T3 >
260 struct UseVectorizedDefaultKernel {
261 enum { value = !IsDiagonal<T3>::value &&
262 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
263 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
264 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
265 IntrinsicTrait<typename T1::ElementType>::addition &&
266 IntrinsicTrait<typename T1::ElementType>::multiplication };
297 VT::vectorizable && MT::vectorizable &&
303 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
304 !evaluateMatrix && MT::smpAssignable };
336 return vec_[index] *
mat_(index,index);
346 const size_t inum( iend - ibegin );
347 const size_t ipos( ibegin + ( ( inum - 1UL ) &
size_t(-2) ) + 1UL );
349 ElementType res(
vec_[ibegin] *
mat_(ibegin,index) );
351 for(
size_t i=ibegin+1UL; i<ipos; i+=2UL ) {
355 res +=
vec_[ipos] *
mat_(ipos,index);
368 return mat_.columns();
398 template<
typename T >
400 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
410 template<
typename T >
412 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
422 return vec_.isAligned() &&
mat_.isAligned();
458 template<
typename VT1 >
465 if( rhs.mat_.rows() == 0UL ) {
469 else if( rhs.mat_.columns() == 0UL ) {
473 LT x(
serial( rhs.vec_ ) );
474 RT A(
serial( rhs.mat_ ) );
481 TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
497 template<
typename VT1
500 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
505 selectSmallAssignKernel( y, x, A );
507 selectBlasAssignKernel( y, x, A );
526 template<
typename VT1
529 static inline void selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
531 const size_t M( A.rows() );
532 const size_t N( A.columns() );
534 if( IsStrictlyUpper<MT1>::value ) {
538 if( !IsLower<MT1>::value )
540 const size_t jbegin( IsStrictlyUpper<MT1>::value ? 1UL : 0UL );
541 for(
size_t j=jbegin; j<N; ++j ) {
542 y[j] = x[0UL] * A(0UL,j);
546 for(
size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
548 if( IsDiagonal<MT1>::value )
550 y[i] = x[i] * A(i,i);
554 const size_t jbegin( ( IsUpper<MT1>::value )
555 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
557 const size_t jend( ( IsLower<MT1>::value )
558 ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
562 const size_t jnum( jend - jbegin );
563 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
565 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
566 y[j ] += x[i] * A(i,j );
567 y[j+1UL] += x[i] * A(i,j+1UL);
570 y[jpos] += x[i] * A(i,jpos);
572 if( IsLower<MT1>::value ) {
573 y[jend] = x[i] * A(i,jend);
578 if( IsStrictlyLower<MT1>::value ) {
599 template<
typename VT1
602 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
603 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
605 selectDefaultAssignKernel( y, x, A );
624 template<
typename VT1
627 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
628 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
630 typedef IntrinsicTrait<ElementType> IT;
632 const size_t M( A.rows() );
633 const size_t N( A.columns() );
639 const size_t ibegin( ( IsLower<MT1>::value )
640 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
642 const size_t iend( ( IsUpper<MT1>::value )
643 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
647 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
649 for(
size_t i=ibegin; i<iend; ++i ) {
650 const IntrinsicType x1(
set( x[i] ) );
651 xmm1 = xmm1 + x1 * A.load(i,j );
652 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
653 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
654 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
655 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
656 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
657 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
658 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
673 const size_t ibegin( ( IsLower<MT1>::value )
674 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
676 const size_t iend( ( IsUpper<MT1>::value )
677 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
681 IntrinsicType xmm1, xmm2, xmm3, xmm4;
683 for(
size_t i=ibegin; i<iend; ++i ) {
684 const IntrinsicType x1(
set( x[i] ) );
685 xmm1 = xmm1 + x1 * A.load(i,j );
686 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
687 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
688 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
699 const size_t ibegin( ( IsLower<MT1>::value )
700 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
702 const size_t iend( ( IsUpper<MT1>::value )
703 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
707 IntrinsicType xmm1, xmm2, xmm3;
709 for(
size_t i=ibegin; i<iend; ++i ) {
710 const IntrinsicType x1(
set( x[i] ) );
711 xmm1 = xmm1 + x1 * A.load(i,j );
712 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
713 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
723 const size_t ibegin( ( IsLower<MT1>::value )
724 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
726 const size_t iend( ( IsUpper<MT1>::value )
727 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
731 IntrinsicType xmm1, xmm2;
733 for(
size_t i=ibegin; i<iend; ++i ) {
734 const IntrinsicType x1(
set( x[i] ) );
735 xmm1 = xmm1 + x1 * A.load(i,j );
736 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
745 const size_t ibegin( ( IsLower<MT1>::value )
746 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
748 const size_t iend( ( IsUpper<MT1>::value )
749 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
755 for(
size_t i=ibegin; i<iend; ++i ) {
756 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
779 template<
typename VT1
782 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
783 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
785 selectDefaultAssignKernel( y, x, A );
804 template<
typename VT1
807 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
808 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
810 typedef IntrinsicTrait<ElementType> IT;
812 const size_t M( A.rows() );
813 const size_t N( A.columns() );
815 const size_t jblock( 32768UL /
sizeof( ElementType ) );
816 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
822 for(
size_t jj=0U; jj<N; jj+=jblock ) {
823 for(
size_t ii=0UL; ii<M; ii+=iblock )
825 const size_t iend(
min( ii+iblock, M ) );
826 const size_t jtmp(
min( jj+jblock, N ) );
827 const size_t jend( ( IsLower<MT1>::value )
828 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
831 size_t j( ( IsUpper<MT1>::value )
832 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
837 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
839 for(
size_t i=ii; i<iend; ++i ) {
840 const IntrinsicType x1(
set( x[i] ) );
841 xmm1 = xmm1 + x1 * A.load(i,j );
842 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
843 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
844 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
845 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
846 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
847 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
848 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
851 y.store( j , y.load(j ) + xmm1 );
863 IntrinsicType xmm1, xmm2, xmm3, xmm4;
865 for(
size_t i=ii; i<iend; ++i ) {
866 const IntrinsicType x1(
set( x[i] ) );
867 xmm1 = xmm1 + x1 * A.load(i,j );
868 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
869 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
870 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
873 y.store( j , y.load(j ) + xmm1 );
881 IntrinsicType xmm1, xmm2, xmm3;
883 for(
size_t i=ii; i<iend; ++i ) {
884 const IntrinsicType x1(
set( x[i] ) );
885 xmm1 = xmm1 + x1 * A.load(i,j );
886 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
887 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
890 y.store( j , y.load(j ) + xmm1 );
897 IntrinsicType xmm1, xmm2;
899 for(
size_t i=ii; i<iend; ++i ) {
900 const IntrinsicType x1(
set( x[i] ) );
901 xmm1 = xmm1 + x1 * A.load(i,j );
902 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
905 y.store( j , y.load(j ) + xmm1 );
913 for(
size_t i=ii; i<iend; ++i ) {
914 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
917 y.store( j, y.load(j) + xmm1 );
939 template<
typename VT1
942 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
943 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
945 selectLargeAssignKernel( y, x, A );
965 template<
typename VT1
968 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
969 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
971 if( IsTriangular<MT1>::value ) {
973 strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
976 sgemv( y, x, A, 1.0F, 0.0F );
998 template<
typename VT1
1001 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1002 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1004 if( IsTriangular<MT1>::value ) {
1006 dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1009 dgemv( y, x, A, 1.0, 0.0 );
1031 template<
typename VT1
1034 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1035 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1037 if( IsTriangular<MT1>::value ) {
1039 ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1042 cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1064 template<
typename VT1
1067 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1068 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1070 if( IsTriangular<MT1>::value ) {
1072 ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1075 zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1095 template<
typename VT1 >
1106 const ResultType tmp(
serial( rhs ) );
1125 template<
typename VT1 >
1132 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1136 LT x(
serial( rhs.vec_ ) );
1137 RT A(
serial( rhs.mat_ ) );
1144 TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
1160 template<
typename VT1
1163 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1165 if( ( IsDiagonal<MT1>::value ) ||
1166 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1168 selectSmallAddAssignKernel( y, x, A );
1170 selectBlasAddAssignKernel( y, x, A );
1189 template<
typename VT1
1192 static inline void selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1194 const size_t M( A.rows() );
1195 const size_t N( A.columns() );
1197 for(
size_t i=0UL; i<M; ++i )
1199 if( IsDiagonal<MT1>::value )
1201 y[i] += x[i] * A(i,i);
1205 const size_t jbegin( ( IsUpper<MT1>::value )
1206 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1208 const size_t jend( ( IsLower<MT1>::value )
1209 ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1213 const size_t jnum( jend - jbegin );
1214 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1216 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1217 y[j ] += x[i] * A(i,j );
1218 y[j+1UL] += x[i] * A(i,j+1UL);
1221 y[jpos] += x[i] * A(i,jpos);
1243 template<
typename VT1
1246 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1247 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1249 selectDefaultAddAssignKernel( y, x, A );
1268 template<
typename VT1
1271 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1272 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1274 typedef IntrinsicTrait<ElementType> IT;
1276 const size_t M( A.rows() );
1277 const size_t N( A.columns() );
1283 const size_t ibegin( ( IsLower<MT1>::value )
1284 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1286 const size_t iend( ( IsUpper<MT1>::value )
1287 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1291 IntrinsicType xmm1( y.load(j ) );
1292 IntrinsicType xmm2( y.load(j+
IT::size ) );
1293 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1294 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1295 IntrinsicType xmm5( y.load(j+
IT::size*4UL) );
1296 IntrinsicType xmm6( y.load(j+
IT::size*5UL) );
1297 IntrinsicType xmm7( y.load(j+
IT::size*6UL) );
1298 IntrinsicType xmm8( y.load(j+
IT::size*7UL) );
1300 for(
size_t i=ibegin; i<iend; ++i ) {
1301 const IntrinsicType x1(
set( x[i] ) );
1302 xmm1 = xmm1 + x1 * A.load(i,j );
1303 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1304 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1305 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1306 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
1307 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
1308 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
1309 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
1312 y.store( j , xmm1 );
1324 const size_t ibegin( ( IsLower<MT1>::value )
1325 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1327 const size_t iend( ( IsUpper<MT1>::value )
1328 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1332 IntrinsicType xmm1( y.load(j ) );
1333 IntrinsicType xmm2( y.load(j+
IT::size ) );
1334 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1335 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1337 for(
size_t i=ibegin; i<iend; ++i ) {
1338 const IntrinsicType x1(
set( x[i] ) );
1339 xmm1 = xmm1 + x1 * A.load(i,j );
1340 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1341 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1342 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1345 y.store( j , xmm1 );
1353 const size_t ibegin( ( IsLower<MT1>::value )
1354 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1356 const size_t iend( ( IsUpper<MT1>::value )
1357 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1361 IntrinsicType xmm1( y.load(j ) );
1362 IntrinsicType xmm2( y.load(j+
IT::size ) );
1363 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1365 for(
size_t i=ibegin; i<iend; ++i ) {
1366 const IntrinsicType x1(
set( x[i] ) );
1367 xmm1 = xmm1 + x1 * A.load(i,j );
1368 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1369 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1372 y.store( j , xmm1 );
1379 const size_t ibegin( ( IsLower<MT1>::value )
1380 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1382 const size_t iend( ( IsUpper<MT1>::value )
1383 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1387 IntrinsicType xmm1( y.load(j ) );
1388 IntrinsicType xmm2( y.load(j+
IT::size) );
1390 for(
size_t i=ibegin; i<iend; ++i ) {
1391 const IntrinsicType x1(
set( x[i] ) );
1392 xmm1 = xmm1 + x1 * A.load(i,j );
1393 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
1396 y.store( j , xmm1 );
1402 const size_t ibegin( ( IsLower<MT1>::value )
1403 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1405 const size_t iend( ( IsUpper<MT1>::value )
1406 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1410 IntrinsicType xmm1( y.load(j) );
1412 for(
size_t i=ibegin; i<iend; ++i ) {
1413 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
1436 template<
typename VT1
1439 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1440 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1442 selectDefaultAddAssignKernel( y, x, A );
1461 template<
typename VT1
1464 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1465 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1467 typedef IntrinsicTrait<ElementType> IT;
1469 const size_t M( A.rows() );
1470 const size_t N( A.columns() );
1472 const size_t jblock( 32768UL /
sizeof( ElementType ) );
1473 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
1477 for(
size_t jj=0U; jj<N; jj+=jblock ) {
1478 for(
size_t ii=0UL; ii<M; ii+=iblock )
1480 const size_t iend(
min( ii+iblock, M ) );
1481 const size_t jtmp(
min( jj+jblock, N ) );
1482 const size_t jend( ( IsLower<MT1>::value )
1483 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
1486 size_t j( ( IsUpper<MT1>::value )
1487 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
1492 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1494 for(
size_t i=ii; i<iend; ++i ) {
1495 const IntrinsicType x1(
set( x[i] ) );
1496 xmm1 = xmm1 + x1 * A.load(i,j );
1497 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1498 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1499 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1500 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
1501 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
1502 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
1503 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
1506 y.store( j , y.load(j ) + xmm1 );
1518 IntrinsicType xmm1, xmm2, xmm3, xmm4;
1520 for(
size_t i=ii; i<iend; ++i ) {
1521 const IntrinsicType x1(
set( x[i] ) );
1522 xmm1 = xmm1 + x1 * A.load(i,j );
1523 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1524 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1525 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
1528 y.store( j , y.load(j ) + xmm1 );
1536 IntrinsicType xmm1, xmm2, xmm3;
1538 for(
size_t i=ii; i<iend; ++i ) {
1539 const IntrinsicType x1(
set( x[i] ) );
1540 xmm1 = xmm1 + x1 * A.load(i,j );
1541 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
1542 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
1545 y.store( j , y.load(j ) + xmm1 );
1552 IntrinsicType xmm1, xmm2;
1554 for(
size_t i=ii; i<iend; ++i ) {
1555 const IntrinsicType x1(
set( x[i] ) );
1556 xmm1 = xmm1 + x1 * A.load(i,j );
1557 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
1560 y.store( j , y.load(j ) + xmm1 );
1568 for(
size_t i=ii; i<iend; ++i ) {
1569 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
1572 y.store( j, y.load(j) + xmm1 );
1594 template<
typename VT1
1597 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1598 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1600 selectLargeAddAssignKernel( y, x, A );
1620 template<
typename VT1
1623 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1624 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1626 if( IsTriangular<MT1>::value ) {
1628 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1632 sgemv( y, x, A, 1.0F, 1.0F );
1654 template<
typename VT1
1657 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1658 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1660 if( IsTriangular<MT1>::value ) {
1662 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1666 dgemv( y, x, A, 1.0, 1.0 );
1688 template<
typename VT1
1691 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1692 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1694 if( IsTriangular<MT1>::value ) {
1696 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1700 cgemv( y, x, A, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1722 template<
typename VT1
1725 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1726 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1728 if( IsTriangular<MT1>::value ) {
1730 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1734 zgemv( y, x, A, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1758 template<
typename VT1 >
1765 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1769 LT x(
serial( rhs.vec_ ) );
1770 RT A(
serial( rhs.mat_ ) );
1777 TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1793 template<
typename VT1
1796 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1798 if( ( IsDiagonal<MT1>::value ) ||
1799 ( IsComputation<MT>::value && !evaluateMatrix ) ||
1801 selectSmallSubAssignKernel( y, x, A );
1803 selectBlasSubAssignKernel( y, x, A );
1822 template<
typename VT1
1825 static inline void selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1827 const size_t M( A.rows() );
1828 const size_t N( A.columns() );
1830 for(
size_t i=0UL; i<M; ++i )
1832 if( IsDiagonal<MT1>::value )
1834 y[i] -= x[i] * A(i,i);
1838 const size_t jbegin( ( IsUpper<MT1>::value )
1839 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1841 const size_t jend( ( IsLower<MT1>::value )
1842 ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1846 const size_t jnum( jend - jbegin );
1847 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1849 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1850 y[j ] -= x[i] * A(i,j );
1851 y[j+1UL] -= x[i] * A(i,j+1UL);
1854 y[jpos] -= x[i] * A(i,jpos);
1876 template<
typename VT1
1879 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1880 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1882 selectDefaultSubAssignKernel( y, x, A );
1902 template<
typename VT1
1905 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1906 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1908 typedef IntrinsicTrait<ElementType> IT;
1910 const size_t M( A.rows() );
1911 const size_t N( A.columns() );
1917 const size_t ibegin( ( IsLower<MT1>::value )
1918 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1920 const size_t iend( ( IsUpper<MT1>::value )
1921 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1925 IntrinsicType xmm1( y.load(j ) );
1926 IntrinsicType xmm2( y.load(j+
IT::size ) );
1927 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1928 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1929 IntrinsicType xmm5( y.load(j+
IT::size*4UL) );
1930 IntrinsicType xmm6( y.load(j+
IT::size*5UL) );
1931 IntrinsicType xmm7( y.load(j+
IT::size*6UL) );
1932 IntrinsicType xmm8( y.load(j+
IT::size*7UL) );
1934 for(
size_t i=ibegin; i<iend; ++i ) {
1935 const IntrinsicType x1(
set( x[i] ) );
1936 xmm1 = xmm1 - x1 * A.load(i,j );
1937 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1938 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1939 xmm4 = xmm4 - x1 * A.load(i,j+
IT::size*3UL);
1940 xmm5 = xmm5 - x1 * A.load(i,j+
IT::size*4UL);
1941 xmm6 = xmm6 - x1 * A.load(i,j+
IT::size*5UL);
1942 xmm7 = xmm7 - x1 * A.load(i,j+
IT::size*6UL);
1943 xmm8 = xmm8 - x1 * A.load(i,j+
IT::size*7UL);
1946 y.store( j , xmm1 );
1958 const size_t ibegin( ( IsLower<MT1>::value )
1959 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1961 const size_t iend( ( IsUpper<MT1>::value )
1962 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1966 IntrinsicType xmm1( y.load(j ) );
1967 IntrinsicType xmm2( y.load(j+
IT::size ) );
1968 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1969 IntrinsicType xmm4( y.load(j+
IT::size*3UL) );
1971 for(
size_t i=ibegin; i<iend; ++i ) {
1972 const IntrinsicType x1(
set( x[i] ) );
1973 xmm1 = xmm1 - x1 * A.load(i,j );
1974 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
1975 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
1976 xmm4 = xmm4 - x1 * A.load(i,j+
IT::size*3UL);
1979 y.store( j , xmm1 );
1987 const size_t ibegin( ( IsLower<MT1>::value )
1988 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1990 const size_t iend( ( IsUpper<MT1>::value )
1991 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1995 IntrinsicType xmm1( y.load(j ) );
1996 IntrinsicType xmm2( y.load(j+
IT::size ) );
1997 IntrinsicType xmm3( y.load(j+
IT::size*2UL) );
1999 for(
size_t i=ibegin; i<iend; ++i ) {
2000 const IntrinsicType x1(
set( x[i] ) );
2001 xmm1 = xmm1 - x1 * A.load(i,j );
2002 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size );
2003 xmm3 = xmm3 - x1 * A.load(i,j+
IT::size*2UL);
2006 y.store( j , xmm1 );
2013 const size_t ibegin( ( IsLower<MT1>::value )
2014 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2016 const size_t iend( ( IsUpper<MT1>::value )
2017 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2021 IntrinsicType xmm1( y.load(j ) );
2022 IntrinsicType xmm2( y.load(j+
IT::size) );
2024 for(
size_t i=ibegin; i<iend; ++i ) {
2025 const IntrinsicType x1(
set( x[i] ) );
2026 xmm1 = xmm1 - x1 * A.load(i,j );
2027 xmm2 = xmm2 - x1 * A.load(i,j+
IT::size);
2030 y.store( j , xmm1 );
2036 const size_t ibegin( ( IsLower<MT1>::value )
2037 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2039 const size_t iend( ( IsUpper<MT1>::value )
2040 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2044 IntrinsicType xmm1( y.load(j) );
2046 for(
size_t i=ibegin; i<iend; ++i ) {
2047 xmm1 = xmm1 -
set( x[i] ) * A.load(i,j);
2070 template<
typename VT1
2073 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
2074 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2076 selectDefaultSubAssignKernel( y, x, A );
2096 template<
typename VT1
2099 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
2100 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2102 typedef IntrinsicTrait<ElementType> IT;
2104 const size_t M( A.rows() );
2105 const size_t N( A.columns() );
2107 const size_t jblock( 32768UL /
sizeof( ElementType ) );
2108 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
2112 for(
size_t jj=0U; jj<N; jj+=jblock ) {
2113 for(
size_t ii=0UL; ii<M; ii+=iblock )
2115 const size_t iend(
min( ii+iblock, M ) );
2116 const size_t jtmp(
min( jj+jblock, N ) );
2117 const size_t jend( ( IsLower<MT1>::value )
2118 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
2121 size_t j( ( IsUpper<MT1>::value )
2122 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
2127 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2129 for(
size_t i=ii; i<iend; ++i ) {
2130 const IntrinsicType x1(
set( x[i] ) );
2131 xmm1 = xmm1 + x1 * A.load(i,j );
2132 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2133 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2134 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2135 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
2136 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
2137 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
2138 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
2141 y.store( j , y.load(j ) - xmm1 );
2153 IntrinsicType xmm1, xmm2, xmm3, xmm4;
2155 for(
size_t i=ii; i<iend; ++i ) {
2156 const IntrinsicType x1(
set( x[i] ) );
2157 xmm1 = xmm1 + x1 * A.load(i,j );
2158 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2159 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2160 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
2163 y.store( j , y.load(j ) - xmm1 );
2171 IntrinsicType xmm1, xmm2, xmm3;
2173 for(
size_t i=ii; i<iend; ++i ) {
2174 const IntrinsicType x1(
set( x[i] ) );
2175 xmm1 = xmm1 + x1 * A.load(i,j );
2176 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
2177 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
2180 y.store( j , y.load(j ) - xmm1 );
2187 IntrinsicType xmm1, xmm2;
2189 for(
size_t i=ii; i<iend; ++i ) {
2190 const IntrinsicType x1(
set( x[i] ) );
2191 xmm1 = xmm1 + x1 * A.load(i,j );
2192 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
2195 y.store( j , y.load(j ) - xmm1 );
2203 for(
size_t i=ii; i<iend; ++i ) {
2204 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2207 y.store( j, y.load(j) - xmm1 );
2229 template<
typename VT1
2232 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
2233 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2235 selectLargeSubAssignKernel( y, x, A );
2255 template<
typename VT1
2258 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
2259 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2261 if( IsTriangular<MT1>::value ) {
2263 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2267 sgemv( y, x, A, -1.0F, 1.0F );
2289 template<
typename VT1
2292 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
2293 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2295 if( IsTriangular<MT1>::value ) {
2297 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2301 dgemv( y, x, A, -1.0, 1.0 );
2323 template<
typename VT1
2326 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2327 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2329 if( IsTriangular<MT1>::value ) {
2331 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2335 cgemv( y, x, A, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2357 template<
typename VT1
2360 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2361 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
2363 if( IsTriangular<MT1>::value ) {
2365 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2369 zgemv( y, x, A, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2393 template<
typename VT1 >
2404 const ResultType tmp(
serial( rhs ) );
2429 template<
typename VT1 >
2430 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2437 if( rhs.mat_.rows() == 0UL ) {
2441 else if( rhs.mat_.columns() == 0UL ) {
2473 template<
typename VT1 >
2474 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2485 const ResultType tmp( rhs );
2506 template<
typename VT1 >
2507 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2514 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2550 template<
typename VT1 >
2551 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2558 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2594 template<
typename VT1 >
2595 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2606 const ResultType tmp( rhs );
2645 template<
typename VT
2649 :
public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
2650 ,
private VecScalarMultExpr
2651 ,
private Computation
2655 typedef TDVecDMatMultExpr<VT,MT> VMM;
2667 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2672 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2673 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2681 template<
typename T1 >
2682 struct UseSMPAssign {
2683 enum { value = ( evaluateVector || evaluateMatrix ) };
2692 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2693 struct UseSinglePrecisionKernel {
2695 HasMutableDataAccess<T1>::value &&
2696 HasConstDataAccess<T2>::value &&
2697 HasConstDataAccess<T3>::value &&
2698 !IsDiagonal<T3>::value &&
2699 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2700 IsFloat<typename T1::ElementType>::value &&
2701 IsFloat<typename T2::ElementType>::value &&
2702 IsFloat<typename T3::ElementType>::value &&
2703 !IsComplex<T4>::value };
2712 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2713 struct UseDoublePrecisionKernel {
2715 HasMutableDataAccess<T1>::value &&
2716 HasConstDataAccess<T2>::value &&
2717 HasConstDataAccess<T3>::value &&
2718 !IsDiagonal<T3>::value &&
2719 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2720 IsDouble<typename T1::ElementType>::value &&
2721 IsDouble<typename T2::ElementType>::value &&
2722 IsDouble<typename T3::ElementType>::value &&
2723 !IsComplex<T4>::value };
2732 template<
typename T1,
typename T2,
typename T3 >
2733 struct UseSinglePrecisionComplexKernel {
2734 typedef complex<float> Type;
2736 HasMutableDataAccess<T1>::value &&
2737 HasConstDataAccess<T2>::value &&
2738 HasConstDataAccess<T3>::value &&
2739 !IsDiagonal<T3>::value &&
2740 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2741 IsSame<typename T1::ElementType,Type>::value &&
2742 IsSame<typename T2::ElementType,Type>::value &&
2743 IsSame<typename T3::ElementType,Type>::value };
2752 template<
typename T1,
typename T2,
typename T3 >
2753 struct UseDoublePrecisionComplexKernel {
2754 typedef complex<double> Type;
2756 HasMutableDataAccess<T1>::value &&
2757 HasConstDataAccess<T2>::value &&
2758 HasConstDataAccess<T3>::value &&
2759 !IsDiagonal<T3>::value &&
2760 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2761 IsSame<typename T1::ElementType,Type>::value &&
2762 IsSame<typename T2::ElementType,Type>::value &&
2763 IsSame<typename T3::ElementType,Type>::value };
2771 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2772 struct UseDefaultKernel {
2773 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2774 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2775 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2776 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2785 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2786 struct UseVectorizedDefaultKernel {
2787 enum { value = !IsDiagonal<T3>::value &&
2788 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2789 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2790 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2791 IsSame<typename T1::ElementType,T4>::value &&
2792 IntrinsicTrait<typename T1::ElementType>::addition &&
2793 IntrinsicTrait<typename T1::ElementType>::multiplication };
2799 typedef DVecScalarMultExpr<VMM,ST,true>
This;
2800 typedef typename MultTrait<RES,ST>::Type
ResultType;
2803 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2808 typedef const TDVecDMatMultExpr<VT,MT>
LeftOperand;
2814 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
LT;
2817 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
RT;
2822 enum { vectorizable = !IsDiagonal<MT>::value &&
2823 VT::vectorizable && MT::vectorizable &&
2824 IsSame<VET,MET>::value &&
2825 IsSame<VET,ST>::value &&
2826 IntrinsicTrait<VET>::addition &&
2827 IntrinsicTrait<VET>::multiplication };
2830 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2831 !evaluateMatrix && MT::smpAssignable };
2840 explicit inline DVecScalarMultExpr(
const VMM& vector, ST scalar )
2852 inline ReturnType
operator[](
size_t index )
const {
2854 return vector_[index] * scalar_;
2863 inline size_t size()
const {
2864 return vector_.size();
2894 template<
typename T >
2895 inline bool canAlias(
const T* alias )
const {
2896 return vector_.canAlias( alias );
2906 template<
typename T >
2907 inline bool isAliased(
const T* alias )
const {
2908 return vector_.isAliased( alias );
2918 return vector_.isAligned();
2928 typename VMM::RightOperand A( vector_.rightOperand() );
2930 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2938 LeftOperand vector_;
2939 RightOperand scalar_;
2954 template<
typename VT1 >
2955 friend inline void assign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2961 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2962 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2964 if( right.rows() == 0UL ) {
2968 else if( right.columns() == 0UL ) {
2980 DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2995 template<
typename VT1
2999 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3001 if( ( IsDiagonal<MT1>::value ) ||
3002 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3004 selectSmallAssignKernel( y, x, A, scalar );
3006 selectBlasAssignKernel( y, x, A, scalar );
3024 template<
typename VT1
3028 static inline void selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3030 const size_t M( A.rows() );
3031 const size_t N( A.columns() );
3033 if( IsStrictlyUpper<MT1>::value ) {
3037 if( !IsLower<MT1>::value )
3039 for(
size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<N; ++j ) {
3040 y[j] = x[0UL] * A(0UL,j);
3044 for(
size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
3046 if( IsDiagonal<MT1>::value )
3048 y[i] = x[i] * A(i,i) * scalar;
3052 const size_t jbegin( ( IsUpper<MT1>::value )
3053 ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3055 const size_t jend( ( IsLower<MT1>::value )
3056 ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
3060 const size_t jnum( jend - jbegin );
3061 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
3063 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
3064 y[j ] += x[i] * A(i,j );
3065 y[j+1UL] += x[i] * A(i,j+1UL);
3068 y[jpos] += x[i] * A(i,jpos);
3070 if( IsLower<MT1>::value ) {
3071 y[jend] = x[i] * A(i,jend);
3076 if( IsStrictlyLower<MT1>::value ) {
3080 if( !IsDiagonal<MT1>::value )
3082 const size_t iend( IsStrictlyLower<MT1>::value ? N-1UL : N );
3083 for(
size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<iend; ++j ) {
3104 template<
typename VT1
3108 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3109 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3111 selectDefaultAssignKernel( y, x, A, scalar );
3129 template<
typename VT1
3133 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3134 selectSmallAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3136 typedef IntrinsicTrait<ElementType> IT;
3138 const size_t M( A.rows() );
3139 const size_t N( A.columns() );
3141 const IntrinsicType factor(
set( scalar ) );
3147 const size_t ibegin( ( IsLower<MT1>::value )
3148 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3150 const size_t iend( ( IsUpper<MT1>::value )
3151 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3155 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3157 for(
size_t i=ibegin; i<iend; ++i ) {
3158 const IntrinsicType x1(
set( x[i] ) );
3159 xmm1 = xmm1 + x1 * A.load(i,j );
3160 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3161 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3162 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3163 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3164 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3165 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3166 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3169 y.store( j , xmm1*factor );
3170 y.store( j+
IT::size , xmm2*factor );
3171 y.store( j+
IT::size*2UL, xmm3*factor );
3172 y.store( j+
IT::size*3UL, xmm4*factor );
3173 y.store( j+
IT::size*4UL, xmm5*factor );
3174 y.store( j+
IT::size*5UL, xmm6*factor );
3175 y.store( j+
IT::size*6UL, xmm7*factor );
3176 y.store( j+
IT::size*7UL, xmm8*factor );
3181 const size_t ibegin( ( IsLower<MT1>::value )
3182 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3184 const size_t iend( ( IsUpper<MT1>::value )
3185 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3189 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3191 for(
size_t i=ibegin; i<iend; ++i ) {
3192 const IntrinsicType x1(
set( x[i] ) );
3193 xmm1 = xmm1 + x1 * A.load(i,j );
3194 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3195 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3196 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3199 y.store( j , xmm1*factor );
3200 y.store( j+
IT::size , xmm2*factor );
3201 y.store( j+
IT::size*2UL, xmm3*factor );
3202 y.store( j+
IT::size*3UL, xmm4*factor );
3207 const size_t ibegin( ( IsLower<MT1>::value )
3208 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3210 const size_t iend( ( IsUpper<MT1>::value )
3211 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3215 IntrinsicType xmm1, xmm2, xmm3;
3217 for(
size_t i=ibegin; i<iend; ++i ) {
3218 const IntrinsicType x1(
set( x[i] ) );
3219 xmm1 = xmm1 + x1 * A.load(i,j );
3220 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3221 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3224 y.store( j , xmm1*factor );
3225 y.store( j+
IT::size , xmm2*factor );
3226 y.store( j+
IT::size*2UL, xmm3*factor );
3231 const size_t ibegin( ( IsLower<MT1>::value )
3232 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3234 const size_t iend( ( IsUpper<MT1>::value )
3235 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3239 IntrinsicType xmm1, xmm2;
3241 for(
size_t i=ibegin; i<iend; ++i ) {
3242 const IntrinsicType x1(
set( x[i] ) );
3243 xmm1 = xmm1 + x1 * A.load(i,j );
3244 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3247 y.store( j , xmm1*factor );
3248 y.store( j+
IT::size, xmm2*factor );
3253 const size_t ibegin( ( IsLower<MT1>::value )
3254 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3256 const size_t iend( ( IsUpper<MT1>::value )
3257 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3263 for(
size_t i=ibegin; i<iend; ++i ) {
3264 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3267 y.store( j, xmm1*factor );
3286 template<
typename VT1
3290 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3291 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3293 selectDefaultAssignKernel( y, x, A, scalar );
3311 template<
typename VT1
3315 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3316 selectLargeAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3318 typedef IntrinsicTrait<ElementType> IT;
3320 const size_t M( A.rows() );
3321 const size_t N( A.columns() );
3323 const size_t jblock( 32768UL /
sizeof( ElementType ) );
3324 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3326 const IntrinsicType factor(
set( scalar ) );
3332 for(
size_t jj=0U; jj<N; jj+=jblock ) {
3333 for(
size_t ii=0UL; ii<M; ii+=iblock )
3335 const size_t iend(
min( ii+iblock, M ) );
3336 const size_t jtmp(
min( jj+jblock, N ) );
3337 const size_t jend( ( IsLower<MT1>::value )
3338 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3341 size_t j( ( IsUpper<MT1>::value )
3342 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
3347 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3349 for(
size_t i=ii; i<iend; ++i ) {
3350 const IntrinsicType x1(
set( x[i] ) );
3351 xmm1 = xmm1 + x1 * A.load(i,j );
3352 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3353 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3354 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3355 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3356 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3357 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3358 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3361 y.store( j , y.load(j ) + xmm1*factor );
3373 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3375 for(
size_t i=ii; i<iend; ++i ) {
3376 const IntrinsicType x1(
set( x[i] ) );
3377 xmm1 = xmm1 + x1 * A.load(i,j );
3378 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3379 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3380 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3383 y.store( j , y.load(j ) + xmm1*factor );
3391 IntrinsicType xmm1, xmm2, xmm3;
3393 for(
size_t i=ii; i<iend; ++i ) {
3394 const IntrinsicType x1(
set( x[i] ) );
3395 xmm1 = xmm1 + x1 * A.load(i,j );
3396 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3397 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3400 y.store( j , y.load(j ) + xmm1*factor );
3407 IntrinsicType xmm1, xmm2;
3409 for(
size_t i=ii; i<iend; ++i ) {
3410 const IntrinsicType x1(
set( x[i] ) );
3411 xmm1 = xmm1 + x1 * A.load(i,j );
3412 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3415 y.store( j , y.load(j ) + xmm1*factor );
3423 for(
size_t i=ii; i<iend; ++i ) {
3424 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3427 y.store( j, y.load(j) + xmm1*factor );
3447 template<
typename VT1
3451 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3452 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3454 selectLargeAssignKernel( y, x, A, scalar );
3473 template<
typename VT1
3477 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3478 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3480 if( IsTriangular<MT1>::value ) {
3482 strmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3485 sgemv( y, x, A, scalar, 0.0F );
3506 template<
typename VT1
3510 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3511 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3513 if( IsTriangular<MT1>::value ) {
3515 dtrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3518 dgemv( y, x, A, scalar, 0.0 );
3539 template<
typename VT1
3543 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3544 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3546 if( IsTriangular<MT1>::value ) {
3548 ctrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3551 cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3572 template<
typename VT1
3576 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3577 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3579 if( IsTriangular<MT1>::value ) {
3581 ztrmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3584 zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3602 template<
typename VT1 >
3603 friend inline void assign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3613 const ResultType tmp(
serial( rhs ) );
3630 template<
typename VT1 >
3631 friend inline void addAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3637 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3638 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3640 if( right.rows() == 0UL || right.columns() == 0UL ) {
3652 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
3667 template<
typename VT1
3671 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3673 if( ( IsDiagonal<MT1>::value ) ||
3674 ( IsComputation<MT>::value && !evaluateMatrix ) ||
3676 selectSmallAddAssignKernel( y, x, A, scalar );
3678 selectBlasAddAssignKernel( y, x, A, scalar );
3696 template<
typename VT1
3700 static inline void selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3702 y.addAssign( x * A * scalar );
3720 template<
typename VT1
3724 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3725 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3727 selectDefaultAddAssignKernel( y, x, A, scalar );
3746 template<
typename VT1
3750 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3751 selectSmallAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3753 typedef IntrinsicTrait<ElementType> IT;
3755 const size_t M( A.rows() );
3756 const size_t N( A.columns() );
3758 const IntrinsicType factor(
set( scalar ) );
3764 const size_t ibegin( ( IsLower<MT1>::value )
3765 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3767 const size_t iend( ( IsUpper<MT1>::value )
3768 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3772 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3774 for(
size_t i=ibegin; i<iend; ++i ) {
3775 const IntrinsicType x1(
set( x[i] ) );
3776 xmm1 = xmm1 + x1 * A.load(i,j );
3777 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3778 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3779 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3780 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3781 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3782 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3783 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3786 y.store( j , y.load(j ) + xmm1*factor );
3798 const size_t ibegin( ( IsLower<MT1>::value )
3799 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3801 const size_t iend( ( IsUpper<MT1>::value )
3802 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3806 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3808 for(
size_t i=ibegin; i<iend; ++i ) {
3809 const IntrinsicType x1(
set( x[i] ) );
3810 xmm1 = xmm1 + x1 * A.load(i,j );
3811 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3812 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3813 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3816 y.store( j , y.load(j ) + xmm1*factor );
3824 const size_t ibegin( ( IsLower<MT1>::value )
3825 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3827 const size_t iend( ( IsUpper<MT1>::value )
3828 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3832 IntrinsicType xmm1, xmm2, xmm3;
3834 for(
size_t i=ibegin; i<iend; ++i ) {
3835 const IntrinsicType x1(
set( x[i] ) );
3836 xmm1 = xmm1 + x1 * A.load(i,j );
3837 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3838 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3841 y.store( j , y.load(j ) + xmm1*factor );
3848 const size_t ibegin( ( IsLower<MT1>::value )
3849 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3851 const size_t iend( ( IsUpper<MT1>::value )
3852 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3856 IntrinsicType xmm1, xmm2;
3858 for(
size_t i=ibegin; i<iend; ++i ) {
3859 const IntrinsicType x1(
set( x[i] ) );
3860 xmm1 = xmm1 + x1 * A.load(i,j );
3861 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
3864 y.store( j , y.load(j ) + xmm1*factor );
3870 const size_t ibegin( ( IsLower<MT1>::value )
3871 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3873 const size_t iend( ( IsUpper<MT1>::value )
3874 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3880 for(
size_t i=ibegin; i<iend; ++i ) {
3881 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3884 y.store( j, y.load(j) + xmm1*factor );
3903 template<
typename VT1
3907 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3908 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3910 selectDefaultAddAssignKernel( y, x, A, scalar );
3929 template<
typename VT1
3933 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3934 selectLargeAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3936 typedef IntrinsicTrait<ElementType> IT;
3938 const size_t M( A.rows() );
3939 const size_t N( A.columns() );
3941 const size_t jblock( 32768UL /
sizeof( ElementType ) );
3942 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3944 const IntrinsicType factor(
set( scalar ) );
3948 for(
size_t jj=0U; jj<N; jj+=jblock ) {
3949 for(
size_t ii=0UL; ii<M; ii+=iblock )
3951 const size_t iend(
min( ii+iblock, M ) );
3952 const size_t jtmp(
min( jj+jblock, N ) );
3953 const size_t jend( ( IsLower<MT1>::value )
3954 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3957 size_t j( ( IsUpper<MT1>::value )
3958 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
3963 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3965 for(
size_t i=ii; i<iend; ++i ) {
3966 const IntrinsicType x1(
set( x[i] ) );
3967 xmm1 = xmm1 + x1 * A.load(i,j );
3968 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3969 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3970 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3971 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
3972 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
3973 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
3974 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
3977 y.store( j , y.load(j ) + xmm1*factor );
3989 IntrinsicType xmm1, xmm2, xmm3, xmm4;
3991 for(
size_t i=ii; i<iend; ++i ) {
3992 const IntrinsicType x1(
set( x[i] ) );
3993 xmm1 = xmm1 + x1 * A.load(i,j );
3994 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
3995 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
3996 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
3999 y.store( j , y.load(j ) + xmm1*factor );
4007 IntrinsicType xmm1, xmm2, xmm3;
4009 for(
size_t i=ii; i<iend; ++i ) {
4010 const IntrinsicType x1(
set( x[i] ) );
4011 xmm1 = xmm1 + x1 * A.load(i,j );
4012 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4013 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4016 y.store( j , y.load(j ) + xmm1*factor );
4023 IntrinsicType xmm1, xmm2;
4025 for(
size_t i=ii; i<iend; ++i ) {
4026 const IntrinsicType x1(
set( x[i] ) );
4027 xmm1 = xmm1 + x1 * A.load(i,j );
4028 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
4031 y.store( j , y.load(j ) + xmm1*factor );
4039 for(
size_t i=ii; i<iend; ++i ) {
4040 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
4043 y.store( j, y.load(j) + xmm1*factor );
4064 template<
typename VT1
4068 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4069 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4071 selectLargeAddAssignKernel( y, x, A, scalar );
4090 template<
typename VT1
4094 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4095 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4097 if( IsTriangular<MT1>::value ) {
4099 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4103 sgemv( y, x, A, scalar, 1.0F );
4124 template<
typename VT1
4128 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4129 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4131 if( IsTriangular<MT1>::value ) {
4133 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4137 dgemv( y, x, A, scalar, 1.0 );
4158 template<
typename VT1
4162 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4163 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4165 if( IsTriangular<MT1>::value ) {
4167 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4171 cgemv( y, x, A, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4192 template<
typename VT1
4196 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4197 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4199 if( IsTriangular<MT1>::value ) {
4201 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4205 zgemv( y, x, A, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4227 template<
typename VT1 >
4228 friend inline void subAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4234 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4235 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4237 if( right.rows() == 0UL || right.columns() == 0UL ) {
4249 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
4264 template<
typename VT1
4268 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4270 if( ( IsDiagonal<MT1>::value ) ||
4271 ( IsComputation<MT>::value && !evaluateMatrix ) ||
4273 selectSmallSubAssignKernel( y, x, A, scalar );
4275 selectBlasSubAssignKernel( y, x, A, scalar );
4293 template<
typename VT1
4297 static inline void selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4299 y.subAssign( x * A * scalar );
4317 template<
typename VT1
4321 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4322 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4324 selectDefaultSubAssignKernel( y, x, A, scalar );
4343 template<
typename VT1
4347 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4348 selectSmallSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4350 typedef IntrinsicTrait<ElementType> IT;
4352 const size_t M( A.rows() );
4353 const size_t N( A.columns() );
4355 const IntrinsicType factor(
set( scalar ) );
4361 const size_t ibegin( ( IsLower<MT1>::value )
4362 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4364 const size_t iend( ( IsUpper<MT1>::value )
4365 ?(
min( j+
IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4369 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4371 for(
size_t i=ibegin; i<iend; ++i ) {
4372 const IntrinsicType x1(
set( x[i] ) );
4373 xmm1 = xmm1 + x1 * A.load(i,j );
4374 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4375 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4376 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4377 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
4378 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
4379 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
4380 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
4383 y.store( j , y.load(j ) - xmm1*factor );
4395 const size_t ibegin( ( IsLower<MT1>::value )
4396 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4398 const size_t iend( ( IsUpper<MT1>::value )
4399 ?(
min( j+
IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4403 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4405 for(
size_t i=ibegin; i<iend; ++i ) {
4406 const IntrinsicType x1(
set( x[i] ) );
4407 xmm1 = xmm1 + x1 * A.load(i,j );
4408 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4409 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4410 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4413 y.store( j , y.load(j ) - xmm1*factor );
4421 const size_t ibegin( ( IsLower<MT1>::value )
4422 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4424 const size_t iend( ( IsUpper<MT1>::value )
4425 ?(
min( j+
IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4429 IntrinsicType xmm1, xmm2, xmm3;
4431 for(
size_t i=ibegin; i<iend; ++i ) {
4432 const IntrinsicType x1(
set( x[i] ) );
4433 xmm1 = xmm1 + x1 * A.load(i,j );
4434 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4435 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4438 y.store( j , y.load(j ) - xmm1*factor );
4445 const size_t ibegin( ( IsLower<MT1>::value )
4446 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4448 const size_t iend( ( IsUpper<MT1>::value )
4449 ?(
min( j+
IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4453 IntrinsicType xmm1, xmm2;
4455 for(
size_t i=ibegin; i<iend; ++i ) {
4456 const IntrinsicType x1(
set( x[i] ) );
4457 xmm1 = xmm1 + x1 * A.load(i,j );
4458 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
4461 y.store( j , y.load(j ) - xmm1*factor );
4467 const size_t ibegin( ( IsLower<MT1>::value )
4468 ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4470 const size_t iend( ( IsUpper<MT1>::value )
4471 ?(
min( j+
IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4477 for(
size_t i=ibegin; i<iend; ++i ) {
4478 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
4481 y.store( j, y.load(j) - xmm1*factor );
4500 template<
typename VT1
4504 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4505 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4507 selectDefaultSubAssignKernel( y, x, A, scalar );
4526 template<
typename VT1
4530 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4531 selectLargeSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4533 typedef IntrinsicTrait<ElementType> IT;
4535 const size_t M( A.rows() );
4536 const size_t N( A.columns() );
4538 const size_t jblock( 32768UL /
sizeof( ElementType ) );
4539 const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
4541 const IntrinsicType factor(
set( scalar ) );
4545 for(
size_t jj=0U; jj<N; jj+=jblock ) {
4546 for(
size_t ii=0UL; ii<M; ii+=iblock )
4548 const size_t iend(
min( ii+iblock, M ) );
4549 const size_t jtmp(
min( jj+jblock, N ) );
4550 const size_t jend( ( IsLower<MT1>::value )
4551 ?(
min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
4554 size_t j( ( IsUpper<MT1>::value )
4555 ?(
max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) &
size_t(-
IT::size) ) )
4560 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4562 for(
size_t i=ii; i<iend; ++i ) {
4563 const IntrinsicType x1(
set( x[i] ) );
4564 xmm1 = xmm1 + x1 * A.load(i,j );
4565 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4566 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4567 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4568 xmm5 = xmm5 + x1 * A.load(i,j+
IT::size*4UL);
4569 xmm6 = xmm6 + x1 * A.load(i,j+
IT::size*5UL);
4570 xmm7 = xmm7 + x1 * A.load(i,j+
IT::size*6UL);
4571 xmm8 = xmm8 + x1 * A.load(i,j+
IT::size*7UL);
4574 y.store( j , y.load(j ) - xmm1*factor );
4586 IntrinsicType xmm1, xmm2, xmm3, xmm4;
4588 for(
size_t i=ii; i<iend; ++i ) {
4589 const IntrinsicType x1(
set( x[i] ) );
4590 xmm1 = xmm1 + x1 * A.load(i,j );
4591 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4592 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4593 xmm4 = xmm4 + x1 * A.load(i,j+
IT::size*3UL);
4596 y.store( j , y.load(j ) - xmm1*factor );
4604 IntrinsicType xmm1, xmm2, xmm3;
4606 for(
size_t i=ii; i<iend; ++i ) {
4607 const IntrinsicType x1(
set( x[i] ) );
4608 xmm1 = xmm1 + x1 * A.load(i,j );
4609 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size );
4610 xmm3 = xmm3 + x1 * A.load(i,j+
IT::size*2UL);
4613 y.store( j , y.load(j ) - xmm1*factor );
4620 IntrinsicType xmm1, xmm2;
4622 for(
size_t i=ii; i<iend; ++i ) {
4623 const IntrinsicType x1(
set( x[i] ) );
4624 xmm1 = xmm1 + x1 * A.load(i,j );
4625 xmm2 = xmm2 + x1 * A.load(i,j+
IT::size);
4628 y.store( j , y.load(j ) - xmm1*factor );
4636 for(
size_t i=ii; i<iend; ++i ) {
4637 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
4640 y.store( j, y.load(j) - xmm1*factor );
4661 template<
typename VT1
4665 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4666 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4668 selectLargeSubAssignKernel( y, x, A, scalar );
4687 template<
typename VT1
4691 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4692 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4694 if( IsTriangular<MT1>::value ) {
4696 strmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4700 sgemv( y, x, A, -scalar, 1.0F );
4721 template<
typename VT1
4725 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
4726 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4728 if( IsTriangular<MT1>::value ) {
4730 dtrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4734 dgemv( y, x, A, -scalar, 1.0 );
4755 template<
typename VT1
4759 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4760 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4762 if( IsTriangular<MT1>::value ) {
4764 ctrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4768 cgemv( y, x, A, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4789 template<
typename VT1
4793 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
4794 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
4796 if( IsTriangular<MT1>::value ) {
4798 ztrmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4802 zgemv( y, x, A, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4824 template<
typename VT1 >
4825 friend inline void multAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4835 const ResultType tmp(
serial( rhs ) );
4858 template<
typename VT1 >
4859 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4860 smpAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4866 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4867 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4869 if( right.rows() == 0UL ) {
4873 else if( right.columns() == 0UL ) {
4903 template<
typename VT1 >
4904 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4905 smpAssign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4915 const ResultType tmp( rhs );
4934 template<
typename VT1 >
4935 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4936 smpAddAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4942 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4943 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4945 if( right.rows() == 0UL || right.columns() == 0UL ) {
4979 template<
typename VT1 >
4980 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4981 smpSubAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
4987 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4988 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4990 if( right.rows() == 0UL || right.columns() == 0UL ) {
5025 template<
typename VT1 >
5026 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
5027 smpMultAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
5037 const ResultType tmp( rhs );
5100 template<
typename T1
5102 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
5107 if( (~vec).
size() != (~mat).
rows() )
5108 throw std::invalid_argument(
"Vector and matrix sizes do not match" );
5136 template<
typename T1
5139 inline const typename EnableIf< IsMatMatMultExpr<T2>,
typename MultExprTrait<T1,T2>::Type >::Type
5161 template<
typename MT,
typename VT >
5163 :
public Columns<MT>
5179 template<
typename VT,
typename MT,
bool AF >
5184 typedef typename MultExprTrait< typename SubvectorExprTrait<const VT,AF>::Type
5185 ,
typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:120
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:278
Header file for mathematical functions.
BLAZE_ALWAYS_INLINE void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:879
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:377
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:123
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:316
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:441
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:291
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:367
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:279
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:276
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:274
Constraint on the data type.
Header file for the MultExprTrait class template.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:431
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:387
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:125
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:263
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:327
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:121
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:313
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:288
Header file for the IsMatMatMultExpr type trait class.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:285
const size_t SMP_TDVECDMATMULT_THRESHOLD
SMP dense vector/row-major dense matrix multiplication threshold.This threshold specifies when a dens...
Definition: Thresholds.h:368
Header file for the IsBlasCompatible type trait.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:275
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:411
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:277
Constraints on the storage order of matrix types.
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:442
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:421
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:273
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
Header file for BLAS level 2 functions.
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:124
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:150
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Constraint on the data type.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
Constraint on the data type.
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:133
Header file for the HasMutableDataAccess type trait.
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Header file for all intrinsic functionality.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:122
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:399
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the IsUpper type trait.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:282
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849