35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
123 template<
typename MT1
125 class TDMatTSMatMultExpr :
public DenseMatrix< TDMatTSMatMultExpr<MT1,MT2>, true >
126 ,
private MatMatMultExpr
127 ,
private Computation
155 template<
typename T1,
typename T2,
typename T3 >
156 struct IsEvaluationRequired {
157 enum { value = ( evaluateLeft || evaluateRight ) };
167 template<
typename T1,
typename T2,
typename T3 >
168 struct UseVectorizedKernel {
170 !IsDiagonal<T2>::value &&
171 T1::vectorizable && T2::vectorizable &&
172 IsColumnMajorMatrix<T1>::value &&
173 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
174 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
175 IntrinsicTrait<typename T1::ElementType>::addition &&
176 IntrinsicTrait<typename T1::ElementType>::subtraction &&
177 IntrinsicTrait<typename T1::ElementType>::multiplication };
188 template<
typename T1,
typename T2,
typename T3 >
189 struct UseOptimizedKernel {
191 !UseVectorizedKernel<T1,T2,T3>::value &&
192 !IsDiagonal<T2>::value &&
193 !IsResizable<typename T1::ElementType>::value &&
194 !IsResizable<ET2>::value };
204 template<
typename T1,
typename T2,
typename T3 >
205 struct UseDefaultKernel {
206 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
207 !UseOptimizedKernel<T1,T2,T3>::value };
245 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
246 !evaluateRight && MT2::smpAssignable };
279 if(
lhs_.columns() == 0UL )
294 if( element != end ) {
295 tmp =
lhs_(i,element->index()) * element->value();
297 for( ; element!=
end; ++element ) {
298 tmp +=
lhs_(i,element->index()) * element->value();
321 :(
lhs_.columns() ) ) );
324 tmp =
lhs_(i,kbegin) *
rhs_(kbegin,j);
325 for(
size_t k=kbegin+1UL; k<kend; ++k ) {
343 inline ReturnType
at(
size_t i,
size_t j )
const {
344 if( i >=
lhs_.rows() ) {
347 if( j >=
rhs_.columns() ) {
370 return rhs_.columns();
400 template<
typename T >
402 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
412 template<
typename T >
414 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
424 return lhs_.isAligned();
434 return (
columns() > SMP_TDMATTSMATMULT_THRESHOLD );
457 template<
typename MT
466 LT A(
serial( rhs.lhs_ ) );
467 RT B(
serial( rhs.rhs_ ) );
476 TDMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
495 template<
typename MT3
499 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
507 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
509 const size_t itmp(
min( ii+block, A.rows() ) );
511 for(
size_t j=0UL; j<B.columns(); ++j )
513 ConstIterator element( B.begin(j) );
514 const ConstIterator
end( B.end(j) );
516 for( ; element!=
end; ++element )
518 const size_t j1( element->index() );
522 C(j1,j) = A(j1,j1) * element->value();
526 const size_t ibegin( ( IsLower<MT4>::value )
527 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
529 const size_t iend( ( IsUpper<MT4>::value )
530 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL ) ) )
533 if( IsTriangular<MT4>::value && ibegin >= iend )
538 for(
size_t i=ibegin; i<iend; ++i ) {
540 C(i,j) = A(i,j1) * element->value();
542 C(i,j) += A(i,j1) * element->value();
566 template<
typename MT3
569 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
570 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
574 const size_t block( IsColumnMajorMatrix<MT3>::value ? A.rows() : 64UL );
578 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
580 const size_t itmp(
min( ii+block, A.rows() ) );
582 for(
size_t j=0UL; j<B.columns(); ++j )
584 const ConstIterator
end( B.end(j) );
585 ConstIterator element( B.begin(j) );
587 const size_t nonzeros( B.nonZeros(j) );
588 const size_t kpos( nonzeros &
size_t(-4) );
591 for(
size_t k=0UL; k<kpos; k+=4UL )
593 const size_t j1( element->index() );
594 const ET2 v1( element->value() );
596 const size_t j2( element->index() );
597 const ET2 v2( element->value() );
599 const size_t j3( element->index() );
600 const ET2 v3( element->value() );
602 const size_t j4( element->index() );
603 const ET2 v4( element->value() );
608 const size_t ibegin( ( IsLower<MT4>::value )
609 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
611 const size_t iend( ( IsUpper<MT4>::value )
612 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j4 : j4+1UL ) ) )
615 if( IsTriangular<MT4>::value && ibegin >= iend )
620 const size_t inum( iend - ibegin );
621 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
624 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
625 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
626 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
627 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
628 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
630 for(
size_t i=ipos; i<iend; ++i ) {
631 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
635 for( ; element!=
end; ++element )
637 const size_t j1( element->index() );
638 const ET2 v1( element->value() );
640 const size_t ibegin( ( IsLower<MT4>::value )
641 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
643 const size_t iend( ( IsUpper<MT4>::value )
644 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL ) ) )
647 if( IsTriangular<MT4>::value && ibegin >= iend )
652 const size_t inum( iend - ibegin );
653 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
656 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
657 C(i ,j) += A(i ,j1) * v1;
658 C(i+1UL,j) += A(i+1UL,j1) * v1;
659 C(i+2UL,j) += A(i+2UL,j1) * v1;
660 C(i+3UL,j) += A(i+3UL,j1) * v1;
662 for(
size_t i=ipos; i<iend; ++i ) {
663 C(i,j) += A(i,j1) * v1;
686 template<
typename MT3
689 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
690 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
692 typedef IntrinsicTrait<ElementType> IT;
695 const bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
699 for(
size_t j=0UL; j<B.columns(); ++j )
701 const ConstIterator
end( B.end(j) );
702 ConstIterator element( B.begin(j) );
704 const size_t nonzeros( B.nonZeros(j) );
705 const size_t kpos( nonzeros &
size_t(-4) );
708 for(
size_t k=0UL; k<kpos; k+=4UL )
710 const size_t j1( element->index() );
711 const ET2 v1( element->value() );
713 const size_t j2( element->index() );
714 const ET2 v2( element->value() );
716 const size_t j3( element->index() );
717 const ET2 v3( element->value() );
719 const size_t j4( element->index() );
720 const ET2 v4( element->value() );
725 const IntrinsicType xmm1(
set( v1 ) );
726 const IntrinsicType xmm2(
set( v2 ) );
727 const IntrinsicType xmm3(
set( v3 ) );
728 const IntrinsicType xmm4(
set( v4 ) );
730 const size_t ibegin( ( IsLower<MT4>::value )
731 ?( ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) &
size_t(-
IT::size) )
733 const size_t iend( ( IsUpper<MT4>::value )
734 ?( IsStrictlyUpper<MT4>::value ? j4 : j4+1UL )
738 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
744 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
746 for( ; remainder && i<iend; ++i ) {
747 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
751 for( ; element!=
end; ++element )
753 const size_t j1( element->index() );
754 const ET2 v1( element->value() );
756 const IntrinsicType xmm1(
set( v1 ) );
758 const size_t ibegin( ( IsLower<MT4>::value )
759 ?( ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) &
size_t(-
IT::size) )
761 const size_t iend( ( IsUpper<MT4>::value )
762 ?( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL )
766 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
772 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
774 for( ; remainder && i<iend; ++i ) {
775 C(i,j) += A(i,j1) * v1;
796 template<
typename MT
802 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
814 const TmpType tmp(
serial( rhs ) );
833 template<
typename MT
835 friend inline void addAssign( DenseMatrix<MT,SO>& lhs,
const TDMatTSMatMultExpr& rhs )
842 LT A(
serial( rhs.lhs_ ) );
843 RT B(
serial( rhs.rhs_ ) );
852 TDMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
871 template<
typename MT3
874 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
875 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
879 const size_t block( Or< IsColumnMajorMatrix<MT3>, IsDiagonal<MT4> >::value ? A.rows() : 64UL );
881 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
883 const size_t itmp(
min( ii+block, A.rows() ) );
885 for(
size_t j=0UL; j<B.columns(); ++j )
887 ConstIterator element( B.begin(j) );
888 const ConstIterator
end( B.end(j) );
890 for( ; element!=
end; ++element )
892 const size_t j1( element->index() );
894 if( IsDiagonal<MT4>::value )
896 C(j1,j) += A(j1,j1) * element->value();
900 const size_t ibegin( ( IsLower<MT4>::value )
901 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
903 const size_t iend( ( IsUpper<MT4>::value )
904 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL ) ) )
907 if( IsTriangular<MT4>::value && ibegin >= iend )
912 const size_t inum( iend - ibegin );
913 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
916 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
917 C(i ,j) += A(i ,j1) * element->value();
918 C(i+1UL,j) += A(i+1UL,j1) * element->value();
919 C(i+2UL,j) += A(i+2UL,j1) * element->value();
920 C(i+3UL,j) += A(i+3UL,j1) * element->value();
922 for(
size_t i=ipos; i<iend; ++i ) {
923 C(i,j) += A(i,j1) * element->value();
947 template<
typename MT3
950 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
951 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
955 const size_t block( IsColumnMajorMatrix<MT3>::value ? A.rows() : 64UL );
957 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
959 const size_t itmp(
min( ii+block, A.rows() ) );
961 for(
size_t j=0UL; j<B.columns(); ++j )
963 const ConstIterator
end( B.end(j) );
964 ConstIterator element( B.begin(j) );
966 const size_t nonzeros( B.nonZeros(j) );
967 const size_t kpos( nonzeros &
size_t(-4) );
970 for(
size_t k=0UL; k<kpos; k+=4UL )
972 const size_t j1( element->index() );
973 const ET2 v1( element->value() );
975 const size_t j2( element->index() );
976 const ET2 v2( element->value() );
978 const size_t j3( element->index() );
979 const ET2 v3( element->value() );
981 const size_t j4( element->index() );
982 const ET2 v4( element->value() );
987 const size_t ibegin( ( IsLower<MT4>::value )
988 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
990 const size_t iend( ( IsUpper<MT4>::value )
991 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j4 : j4+1UL ) ) )
994 if( IsTriangular<MT4>::value && ibegin >= iend )
999 const size_t inum( iend - ibegin );
1000 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1003 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1004 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1005 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1006 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1007 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1009 for(
size_t i=ipos; i<iend; ++i ) {
1010 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1014 for( ; element!=
end; ++element )
1016 const size_t j1( element->index() );
1017 const ET2 v1( element->value() );
1019 const size_t ibegin( ( IsLower<MT4>::value )
1020 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
1022 const size_t iend( ( IsUpper<MT4>::value )
1023 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL ) ) )
1026 if( IsTriangular<MT4>::value && ibegin >= iend )
1031 const size_t inum( iend - ibegin );
1032 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1035 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1036 C(i ,j) += A(i ,j1) * v1;
1037 C(i+1UL,j) += A(i+1UL,j1) * v1;
1038 C(i+2UL,j) += A(i+2UL,j1) * v1;
1039 C(i+3UL,j) += A(i+3UL,j1) * v1;
1041 for(
size_t i=ipos; i<iend; ++i ) {
1042 C(i,j) += A(i,j1) * v1;
1065 template<
typename MT3
1068 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
1069 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1071 typedef IntrinsicTrait<ElementType> IT;
1074 const bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
1076 for(
size_t j=0UL; j<B.columns(); ++j )
1078 const ConstIterator
end( B.end(j) );
1079 ConstIterator element( B.begin(j) );
1081 const size_t nonzeros( B.nonZeros(j) );
1082 const size_t kpos( nonzeros &
size_t(-4) );
1085 for(
size_t k=0UL; k<kpos; k+=4UL )
1087 const size_t j1( element->index() );
1088 const ET2 v1( element->value() );
1090 const size_t j2( element->index() );
1091 const ET2 v2( element->value() );
1093 const size_t j3( element->index() );
1094 const ET2 v3( element->value() );
1096 const size_t j4( element->index() );
1097 const ET2 v4( element->value() );
1102 const IntrinsicType xmm1(
set( v1 ) );
1103 const IntrinsicType xmm2(
set( v2 ) );
1104 const IntrinsicType xmm3(
set( v3 ) );
1105 const IntrinsicType xmm4(
set( v4 ) );
1107 const size_t ibegin( ( IsLower<MT4>::value )
1108 ?( ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) &
size_t(-
IT::size) )
1110 const size_t iend( ( IsUpper<MT4>::value )
1111 ?( IsStrictlyUpper<MT4>::value ? j4 : j4+1UL )
1115 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
1121 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
1123 for( ; remainder && i<iend; ++i ) {
1124 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1128 for( ; element!=
end; ++element )
1130 const size_t j1( element->index() );
1131 const ET2 v1( element->value() );
1133 const IntrinsicType xmm1(
set( v1 ) );
1135 const size_t ibegin( ( IsLower<MT4>::value )
1136 ?( ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) &
size_t(-
IT::size) )
1138 const size_t iend( ( IsUpper<MT4>::value )
1139 ?( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL )
1143 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
1149 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
1151 for( ; remainder && i<iend; ++i ) {
1152 C(i,j) += A(i,j1) * v1;
1177 template<
typename MT
1179 friend inline void subAssign( DenseMatrix<MT,SO>& lhs,
const TDMatTSMatMultExpr& rhs )
1186 LT A(
serial( rhs.lhs_ ) );
1187 RT B(
serial( rhs.rhs_ ) );
1196 TDMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1215 template<
typename MT3
1218 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1219 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1223 const size_t block( Or< IsColumnMajorMatrix<MT3>, IsDiagonal<MT4> >::value ? A.rows() : 64UL );
1225 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1227 const size_t itmp(
min( ii+block, A.rows() ) );
1229 for(
size_t j=0UL; j<B.columns(); ++j )
1231 ConstIterator element( B.begin(j) );
1232 const ConstIterator
end( B.end(j) );
1234 for( ; element!=
end; ++element )
1236 const size_t j1( element->index() );
1238 if( IsDiagonal<MT4>::value )
1240 C(j1,j) -= A(j1,j1) * element->value();
1244 const size_t ibegin( ( IsLower<MT4>::value )
1245 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
1247 const size_t iend( ( IsUpper<MT4>::value )
1248 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL ) ) )
1251 if( IsTriangular<MT4>::value && ibegin >= iend )
1256 const size_t inum( iend - ibegin );
1257 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1260 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1261 C(i ,j) -= A(i ,j1) * element->value();
1262 C(i+1UL,j) -= A(i+1UL,j1) * element->value();
1263 C(i+2UL,j) -= A(i+2UL,j1) * element->value();
1264 C(i+3UL,j) -= A(i+3UL,j1) * element->value();
1266 for(
size_t i=ipos; i<iend; ++i ) {
1267 C(i,j) -= A(i,j1) * element->value();
1291 template<
typename MT3
1294 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
1295 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1299 const size_t block( IsColumnMajorMatrix<MT3>::value ? A.rows() : 64UL );
1301 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1303 const size_t itmp(
min( ii+block, A.rows() ) );
1305 for(
size_t j=0UL; j<B.columns(); ++j )
1307 const ConstIterator
end( B.end(j) );
1308 ConstIterator element( B.begin(j) );
1310 const size_t nonzeros( B.nonZeros(j) );
1311 const size_t kpos( nonzeros &
size_t(-4) );
1314 for(
size_t k=0UL; k<kpos; k+=4UL )
1316 const size_t j1( element->index() );
1317 const ET2 v1( element->value() );
1319 const size_t j2( element->index() );
1320 const ET2 v2( element->value() );
1322 const size_t j3( element->index() );
1323 const ET2 v3( element->value() );
1325 const size_t j4( element->index() );
1326 const ET2 v4( element->value() );
1331 const size_t ibegin( ( IsLower<MT4>::value )
1332 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
1334 const size_t iend( ( IsUpper<MT4>::value )
1335 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j4 : j4+1UL ) ) )
1338 if( IsTriangular<MT4>::value && ibegin >= iend )
1343 const size_t inum( iend - ibegin );
1344 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1347 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1348 C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1349 C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1350 C(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1351 C(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1353 for(
size_t i=ipos; i<iend; ++i ) {
1354 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1358 for( ; element!=
end; ++element )
1360 const size_t j1( element->index() );
1361 const ET2 v1( element->value() );
1363 const size_t ibegin( ( IsLower<MT4>::value )
1364 ?(
max( ii, ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) ) )
1366 const size_t iend( ( IsUpper<MT4>::value )
1367 ?(
min( itmp, ( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL ) ) )
1370 if( IsTriangular<MT4>::value && ibegin >= iend )
1375 const size_t inum( iend - ibegin );
1376 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1379 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1380 C(i ,j) -= A(i ,j1) * v1;
1381 C(i+1UL,j) -= A(i+1UL,j1) * v1;
1382 C(i+2UL,j) -= A(i+2UL,j1) * v1;
1383 C(i+3UL,j) -= A(i+3UL,j1) * v1;
1385 for(
size_t i=ipos; i<iend; ++i ) {
1386 C(i,j) -= A(i,j1) * v1;
1409 template<
typename MT3
1412 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
1413 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1415 typedef IntrinsicTrait<ElementType> IT;
1418 const bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
1420 for(
size_t j=0UL; j<B.columns(); ++j )
1422 const ConstIterator
end( B.end(j) );
1423 ConstIterator element( B.begin(j) );
1425 const size_t nonzeros( B.nonZeros(j) );
1426 const size_t kpos( nonzeros &
size_t(-4) );
1429 for(
size_t k=0UL; k<kpos; k+=4UL )
1431 const size_t j1( element->index() );
1432 const ET2 v1( element->value() );
1434 const size_t j2( element->index() );
1435 const ET2 v2( element->value() );
1437 const size_t j3( element->index() );
1438 const ET2 v3( element->value() );
1440 const size_t j4( element->index() );
1441 const ET2 v4( element->value() );
1446 const IntrinsicType xmm1(
set( v1 ) );
1447 const IntrinsicType xmm2(
set( v2 ) );
1448 const IntrinsicType xmm3(
set( v3 ) );
1449 const IntrinsicType xmm4(
set( v4 ) );
1451 const size_t ibegin( ( IsLower<MT4>::value )
1452 ?( ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) &
size_t(-
IT::size) )
1454 const size_t iend( ( IsUpper<MT4>::value )
1455 ?( IsStrictlyUpper<MT4>::value ? j4 : j4+1UL )
1459 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
1465 C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 - A.load(i,j2) * xmm2 - A.load(i,j3) * xmm3 - A.load(i,j4) * xmm4 );
1467 for( ; remainder && i<iend; ++i ) {
1468 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1472 for( ; element!=
end; ++element )
1474 const size_t j1( element->index() );
1475 const ET2 v1( element->value() );
1477 const IntrinsicType xmm1(
set( v1 ) );
1479 const size_t ibegin( ( IsLower<MT4>::value )
1480 ?( ( IsStrictlyLower<MT4>::value ? j1+1UL : j1 ) &
size_t(-
IT::size) )
1482 const size_t iend( ( IsUpper<MT4>::value )
1483 ?( IsStrictlyUpper<MT4>::value ? j1 : j1+1UL )
1487 const size_t ipos( remainder ? ( iend &
size_t(-
IT::size) ) : iend );
1493 C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 );
1495 for( ; remainder && i<iend; ++i ) {
1496 C(i,j) -= A(i,j1) * v1;
1531 template<
typename MT
1533 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1571 template<
typename MT
1573 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1578 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
1590 const TmpType tmp( rhs );
1611 template<
typename MT
1613 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1655 template<
typename MT
1657 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1742 template<
typename T1
1744 inline const TDMatTSMatMultExpr<T1,T2>
1768 template<
typename MT1,
typename MT2 >
1785 template<
typename MT1,
typename MT2 >
1802 template<
typename MT1,
typename MT2 >
1803 struct IsAligned<
TDMatTSMatMultExpr<MT1,MT2> > :
public IsTrue< IsAligned<MT1>::value >
1819 template<
typename MT1,
typename MT2 >
1821 :
public IsTrue< And< IsLower<MT1>, IsLower<MT2> >::value >
1837 template<
typename MT1,
typename MT2 >
1839 :
public IsTrue< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
1855 template<
typename MT1,
typename MT2 >
1857 :
public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
1858 , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
1874 template<
typename MT1,
typename MT2 >
1876 :
public IsTrue< And< IsUpper<MT1>, IsUpper<MT2> >::value >
1892 template<
typename MT1,
typename MT2 >
1894 :
public IsTrue< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
1910 template<
typename MT1,
typename MT2 >
1912 :
public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
1913 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
1929 template<
typename MT1,
typename MT2,
typename VT >
1934 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1935 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
1936 IsDenseVector<VT>::value && IsColumnVector<VT>::value
1937 ,
typename TDMatDVecMultExprTrait< MT1, typename TSMatDVecMultExprTrait<MT2,VT>::Type >::Type
1938 , INVALID_TYPE >::Type Type;
1947 template<
typename MT1,
typename MT2,
typename VT >
1952 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1953 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
1954 IsSparseVector<VT>::value && IsColumnVector<VT>::value
1955 ,
typename TDMatDVecMultExprTrait< MT1, typename TSMatDVecMultExprTrait<MT2,VT>::Type >::Type
1956 , INVALID_TYPE >::Type Type;
1965 template<
typename VT,
typename MT1,
typename MT2 >
1970 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
1971 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1972 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
1973 ,
typename TDVecTSMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1974 , INVALID_TYPE >::Type Type;
1983 template<
typename VT,
typename MT1,
typename MT2 >
1988 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
1989 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1990 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
1991 ,
typename TDVecTSMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1992 , INVALID_TYPE >::Type Type;
2001 template<
typename MT1,
typename MT2,
bool AF >
2006 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
2007 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
2016 template<
typename MT1,
typename MT2 >
2021 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
2030 template<
typename MT1,
typename MT2 >
2035 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, simd_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
TDMatTSMatMultExpr< MT1, MT2 > This
Type of this TDMatTSMatMultExpr instance.
Definition: TDMatTSMatMultExpr.h:214
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:250
RT2::ElementType ET2
Element type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:134
Expression object for transpose dense matrix-transpose sparse matrix multiplications.The TDMatTSMatMultExpr class represents the compile time expression for multiplications between a column-major dense matrix and a column-major sparse matrix.
Definition: Forward.h:145
Header file for the IsSparseMatrix type trait.
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
MT2::CompositeType CT2
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:136
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:215
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
Header file for the And class template.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:227
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:220
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Header file for the IsUniLower type trait.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:233
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:117
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:547
Constraint on the data type.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:343
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTSMatMultExpr.h:369
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTSMatMultExpr.h:221
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTSMatMultExpr.h:230
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTSMatMultExpr.h:219
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:217
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:79
Header file for the TSVecTDMatMultExprTrait class template.
TDMatTSMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTSMatMultExpr class.
Definition: TDMatTSMatMultExpr.h:255
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:2592
Header file for the Or class template.
Header file for the TDMatSVecMultExprTrait class template.
Header file for the TDVecTSMatMultExprTrait class template.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
Header file for the TSMatDVecMultExprTrait class template.
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTSMatMultExpr.h:401
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:440
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTSMatMultExpr.h:379
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:216
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTSMatMultExpr.h:413
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:79
Removal of reference modifiers.The RemoveCV type trait removes any reference modifiers from the given...
Definition: RemoveReference.h:69
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTSMatMultExpr.h:359
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:1232
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:110
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTSMatMultExpr.h:433
Base template for the MultTrait class.
Definition: MultTrait.h:138
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTSMatMultExpr.h:423
Substitution Failure Is Not An Error (SFINAE) class.The EnableIf class template is an auxiliary tool ...
Definition: EnableIf.h:184
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
Header file for the reset shim.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:131
Header file for the isDefault shim.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:135
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:133
Constraints on the storage order of matrix types.
Header file for the RemoveReference type trait.
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:122
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTSMatMultExpr.h:218
Header file for the IsComputation type trait class.
MT2::ResultType RT2
Result type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:132
Compile time logical or evaluation.The Or class template performs at compile time a logical or ('&&')...
Definition: Or.h:78
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:270
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
Header file for the IsTrue value trait.
RightOperand rightOperand() const
Returns the right-hand side transpose sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:389
Header file for the IsUpper type trait.
Header file for exception macros.
Header file for the IsColumnVector type trait.
Constraint on the data type.
RightOperand rhs_
Right-hand side sparse matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:441
Header file for the IsResizable type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:224
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:79
Header file for the IsExpression type trait class.
Header file for the TSMatSVecMultExprTrait class template.
Header file for the FunctionTrace class.