35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
102 template<
typename VT
104 class TDVecDMatMultExpr :
public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
105 ,
private TVecMatMultExpr
106 ,
private Computation
135 template<
typename T1 >
136 struct UseSMPAssign {
137 enum { value = ( evaluateVector || evaluateMatrix ) };
148 template<
typename T1,
typename T2,
typename T3 >
149 struct UseSinglePrecisionKernel {
150 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
151 IsFloat<typename T1::ElementType>::value &&
152 IsFloat<typename T2::ElementType>::value &&
153 IsFloat<typename T3::ElementType>::value };
164 template<
typename T1,
typename T2,
typename T3 >
165 struct UseDoublePrecisionKernel {
166 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
167 IsDouble<typename T1::ElementType>::value &&
168 IsDouble<typename T2::ElementType>::value &&
169 IsDouble<typename T3::ElementType>::value };
180 template<
typename T1,
typename T2,
typename T3 >
181 struct UseSinglePrecisionComplexKernel {
182 typedef complex<float> Type;
183 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184 IsSame<typename T1::ElementType,Type>::value &&
185 IsSame<typename T2::ElementType,Type>::value &&
186 IsSame<typename T3::ElementType,Type>::value };
197 template<
typename T1,
typename T2,
typename T3 >
198 struct UseDoublePrecisionComplexKernel {
199 typedef complex<double> Type;
200 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
201 IsSame<typename T1::ElementType,Type>::value &&
202 IsSame<typename T2::ElementType,Type>::value &&
203 IsSame<typename T3::ElementType,Type>::value };
213 template<
typename T1,
typename T2,
typename T3 >
214 struct UseDefaultKernel {
215 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
216 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
217 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
218 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
229 template<
typename T1,
typename T2,
typename T3 >
230 struct UseVectorizedDefaultKernel {
231 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234 IntrinsicTrait<typename T1::ElementType>::addition &&
235 IntrinsicTrait<typename T1::ElementType>::multiplication };
265 enum { vectorizable = VT::vectorizable && MT::vectorizable &&
271 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
272 !evaluateMatrix && MT::smpAssignable };
301 if(
mat_.rows() != 0UL ) {
303 for(
size_t j=1UL; j<
end_; j+=2UL ) {
306 if( end_ < mat_.rows() ) {
324 return mat_.columns();
354 template<
typename T >
356 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
366 template<
typename T >
368 return (
vec_.isAliased( alias ) ||
mat_.isAliased( alias ) );
378 return vec_.isAligned() &&
mat_.isAligned();
415 template<
typename VT1 >
422 if( rhs.mat_.rows() == 0UL ) {
426 else if( rhs.mat_.columns() == 0UL ) {
438 TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
454 template<
typename VT1
457 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
461 TDVecDMatMultExpr::selectDefaultAssignKernel( y, x, A );
463 TDVecDMatMultExpr::selectBlasAssignKernel( y, x, A );
482 template<
typename VT1
485 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
486 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
488 const size_t M( A.rows() );
489 const size_t N( A.columns() );
492 const size_t jend( N &
size_t(-2) );
494 for(
size_t j=0UL; j<N; ++j ) {
495 y[j] = x[0UL] * A(0UL,j);
497 for(
size_t i=1UL; i<M; ++i ) {
498 for(
size_t j=0UL; j<jend; j+=2UL ) {
499 y[j ] += x[i] * A(i,j );
500 y[j+1UL] += x[i] * A(i,j+1UL);
503 y[jend] += x[i] * A(i,jend);
524 template<
typename VT1
527 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
528 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
530 typedef IntrinsicTrait<ElementType> IT;
532 const size_t M( A.rows() );
533 const size_t N( A.columns() );
537 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
538 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
539 for(
size_t i=0UL; i<M; ++i ) {
541 xmm1 = xmm1 + x1 * A.load(i,j );
542 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
543 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
544 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
545 xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
546 xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
547 xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
548 xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
551 y.store( j+IT::size , xmm2 );
552 y.store( j+IT::size*2UL, xmm3 );
553 y.store( j+IT::size*3UL, xmm4 );
554 y.store( j+IT::size*4UL, xmm5 );
555 y.store( j+IT::size*5UL, xmm6 );
556 y.store( j+IT::size*6UL, xmm7 );
557 y.store( j+IT::size*7UL, xmm8 );
559 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
561 for(
size_t i=0UL; i<M; ++i ) {
563 xmm1 = xmm1 + x1 * A.load(i,j );
564 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
565 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
566 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
569 y.store( j+IT::size , xmm2 );
570 y.store( j+IT::size*2UL, xmm3 );
571 y.store( j+IT::size*3UL, xmm4 );
573 for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
575 for(
size_t i=0UL; i<M; ++i ) {
577 xmm1 = xmm1 + x1 * A.load(i,j );
578 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
579 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
582 y.store( j+IT::size , xmm2 );
583 y.store( j+IT::size*2UL, xmm3 );
585 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
587 for(
size_t i=0UL; i<M; ++i ) {
589 xmm1 = xmm1 + x1 * A.load(i,j );
590 xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
593 y.store( j+IT::size, xmm2 );
597 for(
size_t i=0UL; i<M; ++i ) {
598 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
620 template<
typename VT1
623 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
624 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
626 selectDefaultAssignKernel( y, x, A );
646 template<
typename VT1
649 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
650 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
652 using boost::numeric_cast;
658 const int M ( numeric_cast<int>( A.rows() ) );
659 const int N ( numeric_cast<int>( A.columns() ) );
660 const int lda( numeric_cast<int>( A.spacing() ) );
662 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
663 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
684 template<
typename VT1
687 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
688 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
690 using boost::numeric_cast;
696 const int M ( numeric_cast<int>( A.rows() ) );
697 const int N ( numeric_cast<int>( A.columns() ) );
698 const int lda( numeric_cast<int>( A.spacing() ) );
700 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
701 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
722 template<
typename VT1
725 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
726 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
728 using boost::numeric_cast;
737 const int M ( numeric_cast<int>( A.rows() ) );
738 const int N ( numeric_cast<int>( A.columns() ) );
739 const int lda( numeric_cast<int>( A.spacing() ) );
740 const complex<float> alpha( 1.0F, 0.0F );
741 const complex<float> beta ( 0.0F, 0.0F );
743 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
744 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
765 template<
typename VT1
768 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
769 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
771 using boost::numeric_cast;
780 const int M ( numeric_cast<int>( A.rows() ) );
781 const int N ( numeric_cast<int>( A.columns() ) );
782 const int lda( numeric_cast<int>( A.spacing() ) );
783 const complex<double> alpha( 1.0, 0.0 );
784 const complex<double> beta ( 0.0, 0.0 );
786 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
787 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
806 template<
typename VT1 >
836 template<
typename VT1 >
843 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
855 TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
871 template<
typename VT1
874 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
876 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
878 TDVecDMatMultExpr::selectDefaultAddAssignKernel( y, x, A );
880 TDVecDMatMultExpr::selectBlasAddAssignKernel( y, x, A );
899 template<
typename VT1
902 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
903 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
905 const size_t M( A.rows() );
906 const size_t N( A.columns() );
909 const size_t jend( N &
size_t(-2) );
911 for(
size_t i=0UL; i<M; ++i ) {
912 for(
size_t j=0UL; j<jend; j+=2UL ) {
913 y[j ] += x[i] * A(i,j );
914 y[j+1UL] += x[i] * A(i,j+1UL);
917 y[jend] += x[i] * A(i,jend);
938 template<
typename VT1
941 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
942 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
944 typedef IntrinsicTrait<ElementType> IT;
946 const size_t M( A.rows() );
947 const size_t N( A.columns() );
951 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
960 for(
size_t i=0UL; i<M; ++i ) {
962 xmm1 = xmm1 + x1 * A.load(i,j );
963 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
964 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
965 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
966 xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
967 xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
968 xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
969 xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
972 y.store( j+IT::size , xmm2 );
973 y.store( j+IT::size*2UL, xmm3 );
974 y.store( j+IT::size*3UL, xmm4 );
975 y.store( j+IT::size*4UL, xmm5 );
976 y.store( j+IT::size*5UL, xmm6 );
977 y.store( j+IT::size*6UL, xmm7 );
978 y.store( j+IT::size*7UL, xmm8 );
980 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
985 for(
size_t i=0UL; i<M; ++i ) {
987 xmm1 = xmm1 + x1 * A.load(i,j );
988 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
989 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
990 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
993 y.store( j+IT::size , xmm2 );
994 y.store( j+IT::size*2UL, xmm3 );
995 y.store( j+IT::size*3UL, xmm4 );
997 for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1001 for(
size_t i=0UL; i<M; ++i ) {
1003 xmm1 = xmm1 + x1 * A.load(i,j );
1004 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1005 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1007 y.store( j , xmm1 );
1008 y.store( j+IT::size , xmm2 );
1009 y.store( j+IT::size*2UL, xmm3 );
1011 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1014 for(
size_t i=0UL; i<M; ++i ) {
1016 xmm1 = xmm1 + x1 * A.load(i,j );
1017 xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1019 y.store( j , xmm1 );
1020 y.store( j+IT::size, xmm2 );
1024 for(
size_t i=0UL; i<M; ++i ) {
1025 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
1047 template<
typename VT1
1050 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1051 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1053 selectDefaultAddAssignKernel( y, x, A );
1073 template<
typename VT1
1076 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1077 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1079 using boost::numeric_cast;
1085 const int M ( numeric_cast<int>( A.rows() ) );
1086 const int N ( numeric_cast<int>( A.columns() ) );
1087 const int lda( numeric_cast<int>( A.spacing() ) );
1089 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, 1.0F,
1090 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1111 template<
typename VT1
1114 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1115 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1117 using boost::numeric_cast;
1123 const int M ( numeric_cast<int>( A.rows() ) );
1124 const int N ( numeric_cast<int>( A.columns() ) );
1125 const int lda( numeric_cast<int>( A.spacing() ) );
1127 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, 1.0,
1128 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1149 template<
typename VT1
1152 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1153 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1155 using boost::numeric_cast;
1164 const int M ( numeric_cast<int>( A.rows() ) );
1165 const int N ( numeric_cast<int>( A.columns() ) );
1166 const int lda( numeric_cast<int>( A.spacing() ) );
1167 const complex<float> alpha( 1.0F, 0.0F );
1168 const complex<float> beta ( 1.0F, 0.0F );
1170 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1171 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1192 template<
typename VT1
1195 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1196 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1198 using boost::numeric_cast;
1207 const int M ( numeric_cast<int>( A.rows() ) );
1208 const int N ( numeric_cast<int>( A.columns() ) );
1209 const int lda( numeric_cast<int>( A.spacing() ) );
1210 const complex<double> alpha( 1.0, 0.0 );
1211 const complex<double> beta ( 1.0, 0.0 );
1213 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1214 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1237 template<
typename VT1 >
1244 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1256 TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1272 template<
typename VT1
1275 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1277 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
1279 TDVecDMatMultExpr::selectDefaultSubAssignKernel( y, x, A );
1281 TDVecDMatMultExpr::selectBlasSubAssignKernel( y, x, A );
1300 template<
typename VT1
1303 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1304 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1306 const size_t M( A.rows() );
1307 const size_t N( A.columns() );
1310 const size_t jend( N &
size_t(-2) );
1312 for(
size_t i=0UL; i<M; ++i ) {
1313 for(
size_t j=0UL; j<jend; j+=2UL ) {
1314 y[j ] -= x[i] * A(i,j );
1315 y[j+1UL] -= x[i] * A(i,j+1UL);
1318 y[jend] -= x[i] * A(i,jend);
1339 template<
typename VT1
1342 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1343 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1345 typedef IntrinsicTrait<ElementType> IT;
1347 const size_t M( A.rows() );
1348 const size_t N( A.columns() );
1352 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1361 for(
size_t i=0UL; i<M; ++i ) {
1363 xmm1 = xmm1 - x1 * A.load(i,j );
1364 xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1365 xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1366 xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1367 xmm5 = xmm5 - x1 * A.load(i,j+IT::size*4UL);
1368 xmm6 = xmm6 - x1 * A.load(i,j+IT::size*5UL);
1369 xmm7 = xmm7 - x1 * A.load(i,j+IT::size*6UL);
1370 xmm8 = xmm8 - x1 * A.load(i,j+IT::size*7UL);
1372 y.store( j , xmm1 );
1373 y.store( j+IT::size , xmm2 );
1374 y.store( j+IT::size*2UL, xmm3 );
1375 y.store( j+IT::size*3UL, xmm4 );
1376 y.store( j+IT::size*4UL, xmm5 );
1377 y.store( j+IT::size*5UL, xmm6 );
1378 y.store( j+IT::size*6UL, xmm7 );
1379 y.store( j+IT::size*7UL, xmm8 );
1381 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1386 for(
size_t i=0UL; i<M; ++i ) {
1388 xmm1 = xmm1 - x1 * A.load(i,j );
1389 xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1390 xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1391 xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1393 y.store( j , xmm1 );
1394 y.store( j+IT::size , xmm2 );
1395 y.store( j+IT::size*2UL, xmm3 );
1396 y.store( j+IT::size*3UL, xmm4 );
1398 for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
1402 for(
size_t i=0UL; i<M; ++i ) {
1404 xmm1 = xmm1 - x1 * A.load(i,j );
1405 xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1406 xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1408 y.store( j , xmm1 );
1409 y.store( j+IT::size , xmm2 );
1410 y.store( j+IT::size*2UL, xmm3 );
1412 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1415 for(
size_t i=0UL; i<M; ++i ) {
1417 xmm1 = xmm1 - x1 * A.load(i,j );
1418 xmm2 = xmm2 - x1 * A.load(i,j+IT::size);
1420 y.store( j , xmm1 );
1421 y.store( j+IT::size, xmm2 );
1425 for(
size_t i=0UL; i<M; ++i ) {
1426 xmm1 = xmm1 -
set( x[i] ) * A.load(i,j);
1448 template<
typename VT1
1451 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1> >::Type
1452 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1454 selectDefaultSubAssignKernel( y, x, A );
1474 template<
typename VT1
1477 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1> >::Type
1478 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1480 using boost::numeric_cast;
1486 const int M ( numeric_cast<int>( A.rows() ) );
1487 const int N ( numeric_cast<int>( A.columns() ) );
1488 const int lda( numeric_cast<int>( A.spacing() ) );
1490 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -1.0F,
1491 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
1512 template<
typename VT1
1515 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1> >::Type
1516 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1518 using boost::numeric_cast;
1524 const int M ( numeric_cast<int>( A.rows() ) );
1525 const int N ( numeric_cast<int>( A.columns() ) );
1526 const int lda( numeric_cast<int>( A.spacing() ) );
1528 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -1.0,
1529 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
1550 template<
typename VT1
1553 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1554 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1556 using boost::numeric_cast;
1565 const int M ( numeric_cast<int>( A.rows() ) );
1566 const int N ( numeric_cast<int>( A.columns() ) );
1567 const int lda( numeric_cast<int>( A.spacing() ) );
1568 const complex<float> alpha( -1.0F, 0.0F );
1569 const complex<float> beta ( 1.0F, 0.0F );
1571 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1572 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1593 template<
typename VT1
1596 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
1597 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
1599 using boost::numeric_cast;
1608 const int M ( numeric_cast<int>( A.rows() ) );
1609 const int N ( numeric_cast<int>( A.columns() ) );
1610 const int lda( numeric_cast<int>( A.spacing() ) );
1611 const complex<double> alpha( -1.0, 0.0 );
1612 const complex<double> beta ( 1.0, 0.0 );
1614 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
1615 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
1638 template<
typename VT1 >
1674 template<
typename VT1 >
1675 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1682 if( rhs.mat_.rows() == 0UL ) {
1686 else if( rhs.mat_.columns() == 0UL ) {
1718 template<
typename VT1 >
1719 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1751 template<
typename VT1 >
1752 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1759 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1795 template<
typename VT1 >
1796 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1803 if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1839 template<
typename VT1 >
1840 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
1889 template<
typename VT
1893 :
public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
1894 ,
private VecScalarMultExpr
1895 ,
private Computation
1899 typedef TDVecDMatMultExpr<VT,MT> VMM;
1911 enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
1916 enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
1917 IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
1925 template<
typename T1 >
1926 struct UseSMPAssign {
1927 enum { value = ( evaluateVector || evaluateMatrix ) };
1936 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1937 struct UseSinglePrecisionKernel {
1938 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1939 IsFloat<typename T1::ElementType>::value &&
1940 IsFloat<typename T2::ElementType>::value &&
1941 IsFloat<typename T3::ElementType>::value &&
1942 !IsComplex<T4>::value };
1951 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1952 struct UseDoublePrecisionKernel {
1953 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1954 IsDouble<typename T1::ElementType>::value &&
1955 IsDouble<typename T2::ElementType>::value &&
1956 IsDouble<typename T3::ElementType>::value &&
1957 !IsComplex<T4>::value };
1966 template<
typename T1,
typename T2,
typename T3 >
1967 struct UseSinglePrecisionComplexKernel {
1968 typedef complex<float> Type;
1969 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1970 IsSame<typename T1::ElementType,Type>::value &&
1971 IsSame<typename T2::ElementType,Type>::value &&
1972 IsSame<typename T3::ElementType,Type>::value };
1981 template<
typename T1,
typename T2,
typename T3 >
1982 struct UseDoublePrecisionComplexKernel {
1983 typedef complex<double> Type;
1984 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1985 IsSame<typename T1::ElementType,Type>::value &&
1986 IsSame<typename T2::ElementType,Type>::value &&
1987 IsSame<typename T3::ElementType,Type>::value };
1995 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1996 struct UseDefaultKernel {
1997 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1998 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1999 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2000 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2009 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2010 struct UseVectorizedDefaultKernel {
2011 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2012 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2013 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2014 IsSame<typename T1::ElementType,T4>::value &&
2015 IntrinsicTrait<typename T1::ElementType>::addition &&
2016 IntrinsicTrait<typename T1::ElementType>::multiplication };
2022 typedef DVecScalarMultExpr<VMM,ST,true>
This;
2023 typedef typename MultTrait<RES,ST>::Type
ResultType;
2026 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2031 typedef const TDVecDMatMultExpr<VT,MT>
LeftOperand;
2037 typedef typename SelectType< evaluateVector, const VRT, VCT >::Type
LT;
2040 typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type
RT;
2045 enum { vectorizable = VT::vectorizable && MT::vectorizable &&
2046 IsSame<VET,MET>::value &&
2047 IsSame<VET,ST>::value &&
2048 IntrinsicTrait<VET>::addition &&
2049 IntrinsicTrait<VET>::multiplication };
2052 enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2053 !evaluateMatrix && MT::smpAssignable };
2062 explicit inline DVecScalarMultExpr(
const VMM& vector, ST scalar )
2076 return vector_[index] * scalar_;
2085 inline size_t size()
const {
2086 return vector_.size();
2116 template<
typename T >
2117 inline bool canAlias(
const T* alias )
const {
2118 return vector_.canAlias( alias );
2128 template<
typename T >
2129 inline bool isAliased(
const T* alias )
const {
2130 return vector_.isAliased( alias );
2140 return vector_.isAligned();
2150 typename VMM::RightOperand A( vector_.rightOperand() );
2152 ( IsComputation<MT>::value && !evaluateMatrix ) ||
2176 template<
typename VT1 >
2177 friend inline void assign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2183 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2184 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2186 if( right.rows() == 0UL ) {
2190 else if( right.columns() == 0UL ) {
2202 DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2217 template<
typename VT1
2221 static inline void selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2223 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2225 DVecScalarMultExpr::selectDefaultAssignKernel( y, x, A, scalar );
2227 DVecScalarMultExpr::selectBlasAssignKernel( y, x, A, scalar );
2245 template<
typename VT1
2249 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2250 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2252 const size_t M( A.rows() );
2253 const size_t N( A.columns() );
2256 const size_t jend( N &
size_t(-2) );
2258 for(
size_t j=0UL; j<N; ++j ) {
2259 y[j] = x[0UL] * A(0UL,j);
2261 for(
size_t i=1UL; i<M; ++i ) {
2262 for(
size_t j=0UL; j<jend; j+=2UL ) {
2263 y[j ] += x[i] * A(i,j );
2264 y[j+1UL] += x[i] * A(i,j+1UL);
2267 y[jend] += x[i] * A(i,jend);
2270 for(
size_t j=0UL; j<N; ++j ) {
2290 template<
typename VT1
2294 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2295 selectDefaultAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2297 typedef IntrinsicTrait<ElementType> IT;
2299 const size_t M( A.rows() );
2300 const size_t N( A.columns() );
2306 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2307 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2308 for(
size_t i=0UL; i<M; ++i ) {
2310 xmm1 = xmm1 + x1 * A.load(i,j );
2311 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2312 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2313 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2314 xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2315 xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2316 xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2317 xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2319 y.store( j , xmm1*factor );
2320 y.store( j+IT::size , xmm2*factor );
2321 y.store( j+IT::size*2UL, xmm3*factor );
2322 y.store( j+IT::size*3UL, xmm4*factor );
2323 y.store( j+IT::size*4UL, xmm5*factor );
2324 y.store( j+IT::size*5UL, xmm6*factor );
2325 y.store( j+IT::size*6UL, xmm7*factor );
2326 y.store( j+IT::size*7UL, xmm8*factor );
2328 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2330 for(
size_t i=0UL; i<M; ++i ) {
2332 xmm1 = xmm1 + x1 * A.load(i,j );
2333 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2334 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2335 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2337 y.store( j , xmm1*factor );
2338 y.store( j+IT::size , xmm2*factor );
2339 y.store( j+IT::size*2UL, xmm3*factor );
2340 y.store( j+IT::size*3UL, xmm4*factor );
2342 for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2344 for(
size_t i=0UL; i<M; ++i ) {
2346 xmm1 = xmm1 + x1 * A.load(i,j );
2347 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2348 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2350 y.store( j , xmm1*factor );
2351 y.store( j+IT::size , xmm2*factor );
2352 y.store( j+IT::size*2UL, xmm3*factor );
2354 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2356 for(
size_t i=0UL; i<M; ++i ) {
2358 xmm1 = xmm1 + x1 * A.load(i,j );
2359 xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2361 y.store( j , xmm1*factor );
2362 y.store( j+IT::size, xmm2*factor );
2366 for(
size_t i=0UL; i<M; ++i ) {
2367 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2369 y.store( j, xmm1*factor );
2387 template<
typename VT1
2391 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2392 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2394 selectDefaultAssignKernel( y, x, A, scalar );
2413 template<
typename VT1
2417 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2418 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2420 using boost::numeric_cast;
2426 const int M ( numeric_cast<int>( A.rows() ) );
2427 const int N ( numeric_cast<int>( A.columns() ) );
2428 const int lda( numeric_cast<int>( A.spacing() ) );
2430 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2431 A.data(), lda, x.data(), 1, 0.0F, y.data(), 1 );
2451 template<
typename VT1
2455 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2456 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2458 using boost::numeric_cast;
2464 const int M ( numeric_cast<int>( A.rows() ) );
2465 const int N ( numeric_cast<int>( A.columns() ) );
2466 const int lda( numeric_cast<int>( A.spacing() ) );
2468 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2469 A.data(), lda, x.data(), 1, 0.0, y.data(), 1 );
2489 template<
typename VT1
2493 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2494 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2496 using boost::numeric_cast;
2505 const int M ( numeric_cast<int>( A.rows() ) );
2506 const int N ( numeric_cast<int>( A.columns() ) );
2507 const int lda( numeric_cast<int>( A.spacing() ) );
2508 const complex<float> alpha( scalar );
2509 const complex<float> beta ( 0.0F, 0.0F );
2511 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2512 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2532 template<
typename VT1
2536 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2537 selectBlasAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2539 using boost::numeric_cast;
2548 const int M ( numeric_cast<int>( A.rows() ) );
2549 const int N ( numeric_cast<int>( A.columns() ) );
2550 const int lda( numeric_cast<int>( A.spacing() ) );
2551 const complex<double> alpha( scalar );
2552 const complex<double> beta ( 0.0, 0.0 );
2554 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2555 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2572 template<
typename VT1 >
2573 friend inline void assign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2600 template<
typename VT1 >
2601 friend inline void addAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2607 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2608 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2610 if( right.rows() == 0UL || right.columns() == 0UL ) {
2622 DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
2637 template<
typename VT1
2641 static inline void selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2643 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
2645 DVecScalarMultExpr::selectDefaultAddAssignKernel( y, x, A, scalar );
2647 DVecScalarMultExpr::selectBlasAddAssignKernel( y, x, A, scalar );
2665 template<
typename VT1
2669 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2670 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2672 y.addAssign( x * A * scalar );
2690 template<
typename VT1
2694 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2695 selectDefaultAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2697 typedef IntrinsicTrait<ElementType> IT;
2699 const size_t M( A.rows() );
2700 const size_t N( A.columns() );
2706 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2707 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2708 for(
size_t i=0UL; i<M; ++i ) {
2710 xmm1 = xmm1 + x1 * A.load(i,j );
2711 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2712 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2713 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2714 xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2715 xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2716 xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2717 xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2719 y.store( j , y.load(j ) + xmm1*factor );
2720 y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2721 y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2722 y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2723 y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
2724 y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
2725 y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
2726 y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
2728 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2730 for(
size_t i=0UL; i<M; ++i ) {
2732 xmm1 = xmm1 + x1 * A.load(i,j );
2733 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2734 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2735 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2737 y.store( j , y.load(j ) + xmm1*factor );
2738 y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2739 y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2740 y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
2742 for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
2744 for(
size_t i=0UL; i<M; ++i ) {
2746 xmm1 = xmm1 + x1 * A.load(i,j );
2747 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2748 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2750 y.store( j , y.load(j ) + xmm1*factor );
2751 y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
2752 y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
2754 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2756 for(
size_t i=0UL; i<M; ++i ) {
2758 xmm1 = xmm1 + x1 * A.load(i,j );
2759 xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2761 y.store( j , y.load(j ) + xmm1*factor );
2762 y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
2766 for(
size_t i=0UL; i<M; ++i ) {
2767 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
2769 y.store( j, y.load(j) + xmm1*factor );
2788 template<
typename VT1
2792 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2793 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2795 selectDefaultAddAssignKernel( y, x, A, scalar );
2814 template<
typename VT1
2818 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2819 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2821 using boost::numeric_cast;
2827 const int M ( numeric_cast<int>( A.rows() ) );
2828 const int N ( numeric_cast<int>( A.columns() ) );
2829 const int lda( numeric_cast<int>( A.spacing() ) );
2831 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2832 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
2852 template<
typename VT1
2856 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
2857 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2859 using boost::numeric_cast;
2865 const int M ( numeric_cast<int>( A.rows() ) );
2866 const int N ( numeric_cast<int>( A.columns() ) );
2867 const int lda( numeric_cast<int>( A.spacing() ) );
2869 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, scalar,
2870 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
2890 template<
typename VT1
2894 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2895 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2897 using boost::numeric_cast;
2906 const int M ( numeric_cast<int>( A.rows() ) );
2907 const int N ( numeric_cast<int>( A.columns() ) );
2908 const int lda( numeric_cast<int>( A.spacing() ) );
2909 const complex<float> alpha( scalar );
2910 const complex<float> beta ( 1.0F, 0.0F );
2912 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2913 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2933 template<
typename VT1
2937 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
2938 selectBlasAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
2940 using boost::numeric_cast;
2949 const int M ( numeric_cast<int>( A.rows() ) );
2950 const int N ( numeric_cast<int>( A.columns() ) );
2951 const int lda( numeric_cast<int>( A.spacing() ) );
2952 const complex<double> alpha( scalar );
2953 const complex<double> beta ( 1.0, 0.0 );
2955 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
2956 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
2977 template<
typename VT1 >
2978 friend inline void subAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
2984 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2985 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2987 if( right.rows() == 0UL || right.columns() == 0UL ) {
2999 DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
3014 template<
typename VT1
3018 static inline void selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3020 if( ( IsComputation<MT>::value && !evaluateMatrix ) ||
3022 DVecScalarMultExpr::selectDefaultSubAssignKernel( y, x, A, scalar );
3024 DVecScalarMultExpr::selectBlasSubAssignKernel( y, x, A, scalar );
3042 template<
typename VT1
3046 static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3047 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3049 y.subAssign( x * A * scalar );
3067 template<
typename VT1
3071 static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3072 selectDefaultSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3074 typedef IntrinsicTrait<ElementType> IT;
3076 const size_t M( A.rows() );
3077 const size_t N( A.columns() );
3083 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3084 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3085 for(
size_t i=0UL; i<M; ++i ) {
3087 xmm1 = xmm1 + x1 * A.load(i,j );
3088 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3089 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3090 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3091 xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3092 xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3093 xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3094 xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3096 y.store( j , y.load(j ) - xmm1*factor );
3097 y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3098 y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3099 y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3100 y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
3101 y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
3102 y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
3103 y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
3105 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3107 for(
size_t i=0UL; i<M; ++i ) {
3109 xmm1 = xmm1 + x1 * A.load(i,j );
3110 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3111 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3112 xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3114 y.store( j , y.load(j ) - xmm1*factor );
3115 y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3116 y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3117 y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3119 for( ; (j+IT::size*2UL) < N; j+=IT::size*3UL ) {
3121 for(
size_t i=0UL; i<M; ++i ) {
3123 xmm1 = xmm1 + x1 * A.load(i,j );
3124 xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3125 xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3127 y.store( j , y.load(j ) - xmm1*factor );
3128 y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3129 y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3131 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3133 for(
size_t i=0UL; i<M; ++i ) {
3135 xmm1 = xmm1 + x1 * A.load(i,j );
3136 xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3138 y.store( j , y.load(j ) - xmm1*factor );
3139 y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
3143 for(
size_t i=0UL; i<M; ++i ) {
3144 xmm1 = xmm1 +
set( x[i] ) * A.load(i,j);
3146 y.store( j, y.load(j) - xmm1*factor );
3165 template<
typename VT1
3169 static inline typename EnableIf< UseDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3170 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3172 selectDefaultSubAssignKernel( y, x, A, scalar );
3191 template<
typename VT1
3195 static inline typename EnableIf< UseSinglePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3196 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3198 using boost::numeric_cast;
3204 const int M ( numeric_cast<int>( A.rows() ) );
3205 const int N ( numeric_cast<int>( A.columns() ) );
3206 const int lda( numeric_cast<int>( A.spacing() ) );
3208 cblas_sgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
3209 A.data(), lda, x.data(), 1, 1.0F, y.data(), 1 );
3229 template<
typename VT1
3233 static inline typename EnableIf< UseDoublePrecisionKernel<VT1,VT2,MT1,ST2> >::Type
3234 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3236 using boost::numeric_cast;
3242 const int M ( numeric_cast<int>( A.rows() ) );
3243 const int N ( numeric_cast<int>( A.columns() ) );
3244 const int lda( numeric_cast<int>( A.spacing() ) );
3246 cblas_dgemv( CblasRowMajor, CblasTrans, M, N, -scalar,
3247 A.data(), lda, x.data(), 1, 1.0, y.data(), 1 );
3267 template<
typename VT1
3271 static inline typename EnableIf< UseSinglePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3272 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3274 using boost::numeric_cast;
3283 const int M ( numeric_cast<int>( A.rows() ) );
3284 const int N ( numeric_cast<int>( A.columns() ) );
3285 const int lda( numeric_cast<int>( A.spacing() ) );
3286 const complex<float> alpha( -scalar );
3287 const complex<float> beta ( 1.0F, 0.0F );
3289 cblas_cgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
3290 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3310 template<
typename VT1
3314 static inline typename EnableIf< UseDoublePrecisionComplexKernel<VT1,VT2,MT1> >::Type
3315 selectBlasSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A, ST2 scalar )
3317 using boost::numeric_cast;
3326 const int M ( numeric_cast<int>( A.rows() ) );
3327 const int N ( numeric_cast<int>( A.columns() ) );
3328 const int lda( numeric_cast<int>( A.spacing() ) );
3329 const complex<double> alpha( -scalar );
3330 const complex<double> beta ( 1.0, 0.0 );
3332 cblas_zgemv( CblasRowMajor, CblasTrans, M, N, &alpha,
3333 A.data(), lda, x.data(), 1, &beta, y.data(), 1 );
3354 template<
typename VT1 >
3355 friend inline void multAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3388 template<
typename VT1 >
3389 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3390 smpAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3396 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3397 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3399 if( right.rows() == 0UL ) {
3403 else if( right.columns() == 0UL ) {
3433 template<
typename VT1 >
3434 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3435 smpAssign( SparseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3464 template<
typename VT1 >
3465 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3466 smpAddAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3472 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3473 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3475 if( right.rows() == 0UL || right.columns() == 0UL ) {
3509 template<
typename VT1 >
3510 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3511 smpSubAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3517 typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3518 typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3520 if( right.rows() == 0UL || right.columns() == 0UL ) {
3555 template<
typename VT1 >
3556 friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
3557 smpMultAssign( DenseVector<VT1,true>& lhs,
const DVecScalarMultExpr& rhs )
3630 template<
typename T1
3632 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
3637 if( (~vec).
size() != (~mat).
rows() )
3638 throw std::invalid_argument(
"Vector and matrix sizes do not match" );
3666 template<
typename T1
3669 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
3689 template<
typename VT,
typename MT,
bool AF >
3694 typedef typename MultExprTrait< VT, typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:377
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:111
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:247
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:115
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:387
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
void smpMultAssign(DenseVector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:179
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:343
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
Header file for the DenseVector base class.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the RequiresEvaluation type trait.
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:296
Header file for the VecScalarMultExpr base class.
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:257
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:242
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:251
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:253
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:244
Header file for the multiplication trait.
Header file for the IsDouble type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:246
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:355
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:281
Header file for the IsMatMatMultExpr type trait class.
const size_t SMP_TDVECDMATMULT_THRESHOLD
SMP dense vector/row-major dense matrix multiplication threshold.This threshold specifies when a dens...
Definition: Thresholds.h:368
Header file for the IsBlasCompatible type trait.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Constraint on the data type.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:367
void multAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the multiplication assignment of a matrix to a matrix.
Definition: Matrix.h:361
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the serial shim.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:323
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:110
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:112
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:254
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:333
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:113
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
Header file for the TVecMatMultExpr base class.
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:398
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:133
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:245
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:260
Header file for all intrinsic functionality.
const size_t end_
End of the unrolled calculation loop.
Definition: TDVecDMatMultExpr.h:399
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
const size_t TDVECDMATMULT_THRESHOLD
Dense Vector/row-major dense matrix multiplication threshold.This setting specifies the threshold bet...
Definition: Thresholds.h:91
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:114
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: TransposeFlag.h:81
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:248
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:397
Constraint on the data type.
Size type of the Blaze library.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
size_t rows(const Matrix< MT, SO > &m)
Returns the current number of rows of the matrix.
Definition: Matrix.h:154
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:243
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.