35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTSMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTSMATMULTEXPR_H_
124 template<
typename MT1
157 template<
typename T1,
typename T2,
typename T3 >
158 struct CanExploitSymmetry {
159 enum { value = IsSymmetric<T2>::value };
170 template<
typename T1,
typename T2,
typename T3 >
171 struct IsEvaluationRequired {
172 enum { value = ( evaluateLeft || evaluateRight ) &&
173 !CanExploitSymmetry<T1,T2,T3>::value };
183 template<
typename T1,
typename T2,
typename T3 >
184 struct UseOptimizedKernel {
186 !IsDiagonal<T2>::value &&
187 !IsResizable<typename T1::ElementType>::value &&
188 !IsResizable<ET2>::value };
218 enum { vectorizable = 0 };
221 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
222 !evaluateRight && MT2::smpAssignable };
255 if(
lhs_.columns() == 0UL )
270 if( element != end ) {
271 tmp =
lhs_(i,element->index()) * element->value();
273 for( ; element!=
end; ++element ) {
274 tmp +=
lhs_(i,element->index()) * element->value();
297 :(
lhs_.columns() ) ) );
300 tmp =
lhs_(i,kbegin) *
rhs_(kbegin,j);
301 for(
size_t k=kbegin+1UL; k<kend; ++k ) {
319 inline ReturnType
at(
size_t i,
size_t j )
const {
320 if( i >=
lhs_.rows() ) {
323 if( j >=
rhs_.columns() ) {
346 return rhs_.columns();
376 template<
typename T >
378 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
388 template<
typename T >
390 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
400 return lhs_.isAligned();
410 return (
rows() > SMP_DMATTSMATMULT_THRESHOLD );
433 template<
typename MT
450 DMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
469 template<
typename MT3
473 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
479 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
481 const size_t jend(
min( jj+block, B.columns() ) );
485 for( ; (i+4UL) <= A.rows(); i+=4UL ) {
486 for(
size_t j=jj; j<jend; ++j )
495 if( element ==
end ) {
503 C(i ,j) = A(i ,element->index()) * element->value();
504 C(i+1UL,j) = A(i+1UL,element->index()) * element->value();
505 C(i+2UL,j) = A(i+2UL,element->index()) * element->value();
506 C(i+3UL,j) = A(i+3UL,element->index()) * element->value();
508 for( ; element!=
end; ++element ) {
509 C(i ,j) += A(i ,element->index()) * element->value();
510 C(i+1UL,j) += A(i+1UL,element->index()) * element->value();
511 C(i+2UL,j) += A(i+2UL,element->index()) * element->value();
512 C(i+3UL,j) += A(i+3UL,element->index()) * element->value();
517 for( ; (i+2UL) <= A.rows(); i+=2UL ) {
518 for(
size_t j=jj; j<jend; ++j )
520 ConstIterator element( ( IsUpper<MT4>::value )
521 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
523 const ConstIterator
end( ( IsLower<MT4>::value )
524 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+2UL,j) : B.upperBound(i+2UL,j) )
527 if( element ==
end ) {
533 C(i ,j) = A(i ,element->index()) * element->value();
534 C(i+1UL,j) = A(i+1UL,element->index()) * element->value();
536 for( ; element!=
end; ++element ) {
537 C(i ,j) += A(i ,element->index()) * element->value();
538 C(i+1UL,j) += A(i+1UL,element->index()) * element->value();
543 for( ; i<A.rows(); ++i ) {
544 for(
size_t j=jj; j<jend; ++j )
546 ConstIterator element( ( IsUpper<MT4>::value )
547 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
549 const ConstIterator
end( ( IsLower<MT4>::value )
550 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i,j) : B.upperBound(i,j) )
553 if( element ==
end ) {
558 C(i,j) = A(i,element->index()) * element->value();
560 for( ; element!=
end; ++element )
561 C(i,j) += A(i,element->index()) * element->value();
583 template<
typename MT3
586 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
587 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
591 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 256UL );
595 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
597 const size_t jend(
min( jj+block, B.columns() ) );
601 for( ; (i+4UL) <= A.rows(); i+=4UL ) {
602 for(
size_t j=jj; j<jend; ++j )
604 ConstIterator element( ( IsUpper<MT4>::value )
605 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
607 const ConstIterator
end( ( IsLower<MT4>::value )
608 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+4UL,j) : B.upperBound(i+4UL,j) )
611 const size_t nonzeros(
end - element );
612 const size_t kpos( nonzeros &
size_t(-4) );
615 for(
size_t k=0UL; k<kpos; k+=4UL )
617 const size_t j1( element->index() );
618 const ET2 v1( element->value() );
620 const size_t j2( element->index() );
621 const ET2 v2( element->value() );
623 const size_t j3( element->index() );
624 const ET2 v3( element->value() );
626 const size_t j4( element->index() );
627 const ET2 v4( element->value() );
632 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
633 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
634 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
635 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
638 for( ; element!=
end; ++element )
640 const size_t j1( element->index() );
641 const ET2 v1( element->value() );
643 C(i ,j) += A(i ,j1) * v1;
644 C(i+1UL,j) += A(i+1UL,j1) * v1;
645 C(i+2UL,j) += A(i+2UL,j1) * v1;
646 C(i+3UL,j) += A(i+3UL,j1) * v1;
651 for( ; (i+2UL) <= A.rows(); i+=2UL ) {
652 for(
size_t j=jj; j<jend; ++j )
654 ConstIterator element( ( IsUpper<MT4>::value )
655 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
657 const ConstIterator
end( ( IsLower<MT4>::value )
658 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+2UL,j) : B.upperBound(i+2UL,j) )
661 const size_t nonzeros(
end - element );
662 const size_t kpos( nonzeros &
size_t(-4) );
665 for(
size_t k=0UL; k<kpos; k+=4UL )
667 const size_t j1( element->index() );
668 const ET2 v1( element->value() );
670 const size_t j2( element->index() );
671 const ET2 v2( element->value() );
673 const size_t j3( element->index() );
674 const ET2 v3( element->value() );
676 const size_t j4( element->index() );
677 const ET2 v4( element->value() );
682 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
683 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
686 for( ; element!=
end; ++element )
688 const size_t j1( element->index() );
689 const ET2 v1( element->value() );
691 C(i ,j) += A(i ,j1) * v1;
692 C(i+1UL,j) += A(i+1UL,j1) * v1;
697 for( ; i<A.rows(); ++i ) {
698 for(
size_t j=jj; j<jend; ++j )
700 ConstIterator element( ( IsUpper<MT4>::value )
701 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
703 const ConstIterator
end( ( IsLower<MT4>::value )
704 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i,j) : B.upperBound(i,j) )
707 const size_t nonzeros(
end - element );
708 const size_t kpos( nonzeros &
size_t(-4) );
711 for(
size_t k=0UL; k<kpos; k+=4UL )
713 const size_t j1( element->index() );
714 const ET2 v1( element->value() );
716 const size_t j2( element->index() );
717 const ET2 v2( element->value() );
719 const size_t j3( element->index() );
720 const ET2 v3( element->value() );
722 const size_t j4( element->index() );
723 const ET2 v4( element->value() );
728 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
731 for( ; element!=
end; ++element )
733 const size_t j1( element->index() );
734 const ET2 v1( element->value() );
736 C(i,j) += A(i,j1) * v1;
758 template<
typename MT
760 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
765 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
777 const TmpType tmp(
serial( rhs ) );
798 template<
typename MT
800 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
808 assign( ~lhs,
trans( rhs.lhs_ ) * rhs.rhs_ );
826 template<
typename MT
828 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
836 LT A(
serial( rhs.lhs_ ) );
837 RT B(
serial( rhs.rhs_ ) );
843 DMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
862 template<
typename MT3
865 static inline typename DisableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
866 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
870 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 256UL );
872 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
874 const size_t jend(
min( jj+block, B.columns() ) );
878 for( ; (i+4UL) <= A.rows(); i+=4UL ) {
879 for(
size_t j=jj; j<jend; ++j )
881 ConstIterator element( ( IsUpper<MT4>::value )
882 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
884 const ConstIterator
end( ( IsLower<MT4>::value )
885 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+4UL,j) : B.upperBound(i+4UL,j) )
888 for( ; element!=
end; ++element ) {
889 C(i ,j) += A(i ,element->index()) * element->value();
890 C(i+1UL,j) += A(i+1UL,element->index()) * element->value();
891 C(i+2UL,j) += A(i+2UL,element->index()) * element->value();
892 C(i+3UL,j) += A(i+3UL,element->index()) * element->value();
897 for( ; (i+2UL) <= A.rows(); i+=2UL ) {
898 for(
size_t j=jj; j<jend; ++j )
900 ConstIterator element( ( IsUpper<MT4>::value )
901 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
903 const ConstIterator
end( ( IsLower<MT4>::value )
904 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+2UL,j) : B.upperBound(i+2UL,j) )
907 for( ; element!=
end; ++element ) {
908 C(i ,j) += A(i ,element->index()) * element->value();
909 C(i+1UL,j) += A(i+1UL,element->index()) * element->value();
914 for( ; i<A.rows(); ++i ) {
915 for(
size_t j=jj; j<jend; ++j )
917 ConstIterator element( ( IsUpper<MT4>::value )
918 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
920 const ConstIterator
end( ( IsLower<MT4>::value )
921 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i,j) : B.upperBound(i,j) )
924 for( ; element!=
end; ++element )
925 C(i,j) += A(i,element->index()) * element->value();
947 template<
typename MT3
950 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
951 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
955 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 256UL );
957 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
959 const size_t jend(
min( jj+block, B.columns() ) );
963 for( ; (i+4UL) <= A.rows(); i+=4UL ) {
964 for(
size_t j=jj; j<jend; ++j )
966 ConstIterator element( ( IsUpper<MT4>::value )
967 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
969 const ConstIterator
end( ( IsLower<MT4>::value )
970 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+4UL,j) : B.upperBound(i+4UL,j) )
973 const size_t nonzeros(
end - element );
974 const size_t kpos( nonzeros &
size_t(-4) );
977 for(
size_t k=0UL; k<kpos; k+=4UL )
979 const size_t j1( element->index() );
980 const ET2 v1( element->value() );
982 const size_t j2( element->index() );
983 const ET2 v2( element->value() );
985 const size_t j3( element->index() );
986 const ET2 v3( element->value() );
988 const size_t j4( element->index() );
989 const ET2 v4( element->value() );
994 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
995 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
996 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
997 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1000 for( ; element!=
end; ++element )
1002 const size_t j1( element->index() );
1003 const ET2 v1( element->value() );
1005 C(i ,j) += A(i ,j1) * v1;
1006 C(i+1UL,j) += A(i+1UL,j1) * v1;
1007 C(i+2UL,j) += A(i+2UL,j1) * v1;
1008 C(i+3UL,j) += A(i+3UL,j1) * v1;
1013 for( ; (i+2UL) <= A.rows(); i+=2UL ) {
1014 for(
size_t j=jj; j<jend; ++j )
1016 ConstIterator element( ( IsUpper<MT4>::value )
1017 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1019 const ConstIterator
end( ( IsLower<MT4>::value )
1020 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+2UL,j) : B.upperBound(i+2UL,j) )
1023 const size_t nonzeros(
end - element );
1024 const size_t kpos( nonzeros &
size_t(-4) );
1027 for(
size_t k=0UL; k<kpos; k+=4UL )
1029 const size_t j1( element->index() );
1030 const ET2 v1( element->value() );
1032 const size_t j2( element->index() );
1033 const ET2 v2( element->value() );
1035 const size_t j3( element->index() );
1036 const ET2 v3( element->value() );
1038 const size_t j4( element->index() );
1039 const ET2 v4( element->value() );
1044 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1045 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1048 for( ; element!=
end; ++element )
1050 const size_t j1( element->index() );
1051 const ET2 v1( element->value() );
1053 C(i ,j) += A(i ,j1) * v1;
1054 C(i+1UL,j) += A(i+1UL,j1) * v1;
1059 for( ; i<A.rows(); ++i ) {
1060 for(
size_t j=jj; j<jend; ++j )
1062 ConstIterator element( ( IsUpper<MT4>::value )
1063 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1065 const ConstIterator
end( ( IsLower<MT4>::value )
1066 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i,j) : B.upperBound(i,j) )
1069 const size_t nonzeros(
end - element );
1070 const size_t kpos( nonzeros &
size_t(-4) );
1073 for(
size_t k=0UL; k<kpos; k+=4UL )
1075 const size_t j1( element->index() );
1076 const ET2 v1( element->value() );
1078 const size_t j2( element->index() );
1079 const ET2 v2( element->value() );
1081 const size_t j3( element->index() );
1082 const ET2 v3( element->value() );
1084 const size_t j4( element->index() );
1085 const ET2 v4( element->value() );
1090 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1093 for( ; element!=
end; ++element )
1095 const size_t j1( element->index() );
1096 const ET2 v1( element->value() );
1098 C(i,j) += A(i,j1) * v1;
1122 template<
typename MT
1124 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1134 addAssign( ~lhs,
trans( rhs.lhs_ ) * rhs.rhs_ );
1156 template<
typename MT
1158 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1166 LT A(
serial( rhs.lhs_ ) );
1167 RT B(
serial( rhs.rhs_ ) );
1173 DMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1192 template<
typename MT3
1195 static inline typename DisableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
1196 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1200 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 256UL );
1202 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1204 const size_t jend(
min( jj+block, B.columns() ) );
1208 for( ; (i+4UL) <= A.rows(); i+=4UL ) {
1209 for(
size_t j=jj; j<jend; ++j )
1211 ConstIterator element( ( IsUpper<MT4>::value )
1212 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1214 const ConstIterator
end( ( IsLower<MT4>::value )
1215 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+4UL,j) : B.upperBound(i+4UL,j) )
1218 for( ; element!=
end; ++element ) {
1219 C(i ,j) -= A(i ,element->index()) * element->value();
1220 C(i+1UL,j) -= A(i+1UL,element->index()) * element->value();
1221 C(i+2UL,j) -= A(i+2UL,element->index()) * element->value();
1222 C(i+3UL,j) -= A(i+3UL,element->index()) * element->value();
1227 for( ; (i+2UL) <= A.rows(); i+=2UL ) {
1228 for(
size_t j=jj; j<jend; ++j )
1230 ConstIterator element( ( IsUpper<MT4>::value )
1231 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1233 const ConstIterator
end( ( IsLower<MT4>::value )
1234 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+2UL,j) : B.upperBound(i+2UL,j) )
1237 for( ; element!=
end; ++element ) {
1238 C(i ,j) -= A(i ,element->index()) * element->value();
1239 C(i+1UL,j) -= A(i+1UL,element->index()) * element->value();
1244 for( ; i<A.rows(); ++i ) {
1245 for(
size_t j=jj; j<jend; ++j )
1247 ConstIterator element( ( IsUpper<MT4>::value )
1248 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1250 const ConstIterator
end( ( IsLower<MT4>::value )
1251 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i,j) : B.upperBound(i,j) )
1254 for( ; element!=
end; ++element )
1255 C(i,j) -= A(i,element->index()) * element->value();
1277 template<
typename MT3
1280 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
1281 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1285 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 256UL );
1287 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1289 const size_t jend(
min( jj+block, B.columns() ) );
1293 for( ; (i+4UL) <= A.rows(); i+=4UL ) {
1294 for(
size_t j=jj; j<jend; ++j )
1296 ConstIterator element( ( IsUpper<MT4>::value )
1297 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1299 const ConstIterator
end( ( IsLower<MT4>::value )
1300 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+4UL,j) : B.upperBound(i+4UL,j) )
1303 const size_t nonzeros(
end - element );
1304 const size_t kpos( nonzeros &
size_t(-4) );
1307 for(
size_t k=0UL; k<kpos; k+=4UL )
1309 const size_t j1( element->index() );
1310 const ET2 v1( element->value() );
1312 const size_t j2( element->index() );
1313 const ET2 v2( element->value() );
1315 const size_t j3( element->index() );
1316 const ET2 v3( element->value() );
1318 const size_t j4( element->index() );
1319 const ET2 v4( element->value() );
1324 C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1325 C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1326 C(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1327 C(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1330 for( ; element!=
end; ++element )
1332 const size_t j1( element->index() );
1333 const ET2 v1( element->value() );
1335 C(i ,j) -= A(i ,j1) * v1;
1336 C(i+1UL,j) -= A(i+1UL,j1) * v1;
1337 C(i+2UL,j) -= A(i+2UL,j1) * v1;
1338 C(i+3UL,j) -= A(i+3UL,j1) * v1;
1343 for( ; (i+2UL) <= A.rows(); i+=2UL ) {
1344 for(
size_t j=jj; j<jend; ++j )
1346 ConstIterator element( ( IsUpper<MT4>::value )
1347 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1349 const ConstIterator
end( ( IsLower<MT4>::value )
1350 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i+2UL,j) : B.upperBound(i+2UL,j) )
1353 const size_t nonzeros(
end - element );
1354 const size_t kpos( nonzeros &
size_t(-4) );
1357 for(
size_t k=0UL; k<kpos; k+=4UL )
1359 const size_t j1( element->index() );
1360 const ET2 v1( element->value() );
1362 const size_t j2( element->index() );
1363 const ET2 v2( element->value() );
1365 const size_t j3( element->index() );
1366 const ET2 v3( element->value() );
1368 const size_t j4( element->index() );
1369 const ET2 v4( element->value() );
1374 C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1375 C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1378 for( ; element!=
end; ++element )
1380 const size_t j1( element->index() );
1381 const ET2 v1( element->value() );
1383 C(i ,j) -= A(i ,j1) * v1;
1384 C(i+1UL,j) -= A(i+1UL,j1) * v1;
1389 for( ; i<A.rows(); ++i ) {
1390 for(
size_t j=jj; j<jend; ++j )
1392 ConstIterator element( ( IsUpper<MT4>::value )
1393 ?( IsStrictlyUpper<MT4>::value ? B.upperBound(i,j) : B.lowerBound(i,j) )
1395 const ConstIterator
end( ( IsLower<MT4>::value )
1396 ?( IsStrictlyLower<MT4>::value ? B.lowerBound(i,j) : B.upperBound(i,j) )
1399 const size_t nonzeros(
end - element );
1400 const size_t kpos( nonzeros &
size_t(-4) );
1403 for(
size_t k=0UL; k<kpos; k+=4UL )
1405 const size_t j1( element->index() );
1406 const ET2 v1( element->value() );
1408 const size_t j2( element->index() );
1409 const ET2 v2( element->value() );
1411 const size_t j3( element->index() );
1412 const ET2 v3( element->value() );
1414 const size_t j4( element->index() );
1415 const ET2 v4( element->value() );
1420 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1423 for( ; element!=
end; ++element )
1425 const size_t j1( element->index() );
1426 const ET2 v1( element->value() );
1428 C(i,j) -= A(i,j1) * v1;
1452 template<
typename MT
1454 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1464 subAssign( ~lhs,
trans( rhs.lhs_ ) * rhs.rhs_ );
1496 template<
typename MT
1498 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1533 template<
typename MT
1535 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1540 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1552 const TmpType tmp( rhs );
1573 template<
typename MT
1575 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1606 template<
typename MT
1608 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1643 template<
typename MT
1645 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1680 template<
typename MT
1682 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1717 template<
typename MT
1719 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1797 template<
typename T1
1799 inline const DMatTSMatMultExpr<T1,T2>
1823 template<
typename MT1,
typename MT2 >
1840 template<
typename MT1,
typename MT2 >
1857 template<
typename MT1,
typename MT2 >
1858 struct IsAligned<
DMatTSMatMultExpr<MT1,MT2> > :
public IsTrue< IsAligned<MT1>::value >
1874 template<
typename MT1,
typename MT2 >
1876 :
public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
1892 template<
typename MT1,
typename MT2 >
1894 :
public IsTrue< IsUniLower<MT1>::value && IsUniLower<MT2>::value >
1910 template<
typename MT1,
typename MT2 >
1912 :
public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
1913 , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
1929 template<
typename MT1,
typename MT2 >
1931 :
public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
1947 template<
typename MT1,
typename MT2 >
1949 :
public IsTrue< IsUniUpper<MT1>::value && IsUniUpper<MT2>::value >
1965 template<
typename MT1,
typename MT2 >
1967 :
public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
1968 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
1984 template<
typename MT1,
typename MT2,
typename VT >
1989 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1990 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
1991 IsDenseVector<VT>::value && IsColumnVector<VT>::value
1992 ,
typename DMatDVecMultExprTrait< MT1, typename TSMatDVecMultExprTrait<MT2,VT>::Type >::Type
1993 , INVALID_TYPE >::Type Type;
2002 template<
typename MT1,
typename MT2,
typename VT >
2007 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
2008 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
2009 IsSparseVector<VT>::value && IsColumnVector<VT>::value
2010 ,
typename DMatSVecMultExprTrait< MT1, typename TSMatSVecMultExprTrait<MT2,VT>::Type >::Type
2011 , INVALID_TYPE >::Type Type;
2020 template<
typename VT,
typename MT1,
typename MT2 >
2025 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
2026 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
2027 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
2028 ,
typename TDVecTSMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
2029 , INVALID_TYPE >::Type Type;
2038 template<
typename VT,
typename MT1,
typename MT2 >
2043 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
2044 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
2045 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
2046 ,
typename TDVecTSMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
2047 , INVALID_TYPE >::Type Type;
2056 template<
typename MT1,
typename MT2,
bool AF >
2061 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
2062 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
2071 template<
typename MT1,
typename MT2 >
2076 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
2085 template<
typename MT1,
typename MT2 >
2090 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTSMatMultExpr.h:199
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTSMatMultExpr.h:246
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTSMatMultExpr.h:204
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
Header file for basic type definitions.
MT2::CompositeType CT2
Composite type of the right-hand side sparse matrix expression.
Definition: DMatTSMatMultExpr.h:137
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:250
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTSMatMultExpr.h:377
Header file for the IsSparseMatrix type trait.
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTSMatMultExpr.h:198
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
Header file for the And class template.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTSMatMultExpr.h:355
Header file for the IsUniLower type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:117
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTSMatMultExpr.h:201
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTSMatMultExpr.h:399
DMatTSMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTSMatMultExpr class.
Definition: DMatTSMatMultExpr.h:231
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTSMatMultExpr.h:335
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:110
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:79
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTSMatMultExpr.h:136
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:2592
Header file for the Or class template.
Header file for the TDVecTSMatMultExprTrait class template.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
Header file for the TSMatDVecMultExprTrait class template.
RT2::ElementType ET2
Element type of the right-hand side sparse matrix expression.
Definition: DMatTSMatMultExpr.h:135
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTSMatMultExpr.h:210
Header file for the DMatDVecMultExprTrait class template.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTSMatMultExpr.h:132
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTSMatMultExpr.h:197
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTSMatMultExpr.h:134
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
RightOperand rhs_
Right-hand side sparse matrix of the multiplication expression.
Definition: DMatTSMatMultExpr.h:417
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:79
Removal of reference modifiers.The RemoveCV type trait removes any reference modifiers from the given...
Definition: RemoveReference.h:69
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:138
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTSMatMultExpr.h:200
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTSMatMultExpr.h:196
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
RightOperand rightOperand() const
Returns the right-hand side transpose sparse matrix operand.
Definition: DMatTSMatMultExpr.h:365
Header file for the reset shim.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side sparse matrix operand.
Definition: DMatTSMatMultExpr.h:213
Expression object for dense matrix-transpose sparse matrix multiplications.The DMatTSMatMultExpr clas...
Definition: DMatTSMatMultExpr.h:126
Constraints on the storage order of matrix types.
Header file for the RemoveReference type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTSMatMultExpr.h:389
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:122
Header file for the IsDenseVector type trait.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: DMatTSMatMultExpr.h:319
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatTSMatMultExpr.h:409
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTSMatMultExpr.h:416
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:944
Header file for the IsComputation type trait class.
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
Header file for the IsTrue value trait.
Header file for the TSVecDMatMultExprTrait class template.
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTSMatMultExpr.h:345
Header file for the IsUpper type trait.
Header file for exception macros.
MT2::ResultType RT2
Result type of the right-hand side sparse matrix expression.
Definition: DMatTSMatMultExpr.h:133
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
Header file for the IsResizable type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
DMatTSMatMultExpr< MT1, MT2 > This
Type of this DMatTSMatMultExpr instance.
Definition: DMatTSMatMultExpr.h:195
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:79
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side sparse matrix expression.
Definition: DMatTSMatMultExpr.h:207
Header file for the IsExpression type trait class.
Header file for the TSMatSVecMultExprTrait class template.
Header file for the FunctionTrace class.