35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_ 120 template<
typename MT1
126 class TDMatTSMatMultExpr
127 :
public MatMatMultExpr< DenseMatrix< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, true > >
128 ,
private Computation
153 SYM = ( SF && !( HF || LF || UF ) ),
154 HERM = ( HF && !( LF || UF ) ),
155 LOW = ( LF || ( ( SF || HF ) && UF ) ),
156 UPP = ( UF || ( ( SF || HF ) && LF ) )
166 template<
typename T1,
typename T2,
typename T3 >
167 struct IsEvaluationRequired {
168 enum :
bool { value = ( evaluateLeft || evaluateRight ) };
178 template<
typename T1,
typename T2,
typename T3 >
179 struct UseVectorizedKernel {
180 enum :
bool { value = useOptimizedKernels &&
182 T1::simdEnabled && T2::simdEnabled &&
199 template<
typename T1,
typename T2,
typename T3 >
200 struct UseOptimizedKernel {
201 enum :
bool { value = useOptimizedKernels &&
202 !UseVectorizedKernel<T1,T2,T3>::value &&
215 template<
typename T1,
typename T2,
typename T3 >
216 struct UseDefaultKernel {
217 enum :
bool { value = !UseVectorizedKernel<T1,T2,T3>::value &&
218 !UseOptimizedKernel<T1,T2,T3>::value };
276 enum :
bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
277 !evaluateRight && MT2::smpAssignable };
332 :(
lhs_.columns() ) ) );
336 const size_t n(
end - begin );
355 if( i >=
lhs_.rows() ) {
358 if( j >=
rhs_.columns() ) {
370 inline size_t rows() const noexcept {
381 return rhs_.columns();
411 template<
typename T >
412 inline bool canAlias(
const T* alias )
const noexcept {
413 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
423 template<
typename T >
424 inline bool isAliased(
const T* alias )
const noexcept {
425 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
435 return lhs_.isAligned();
468 template<
typename MT
487 TDMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
506 template<
typename MT3
510 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
518 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
520 const size_t itmp(
min( ii+block, A.rows() ) );
522 for(
size_t j=0UL; j<B.columns(); ++j )
527 for( ; element!=
end; ++element )
529 const size_t j1( element->index() );
533 C(j1,j) = A(j1,j1) * element->value();
541 :( LOW ?
max(j,ii) : ii ) );
543 ?( ( SYM || HERM || UPP )
546 :( SYM || HERM || UPP ?
min(j+1UL,itmp) : itmp ) );
553 for(
size_t i=ibegin; i<iend; ++i ) {
555 C(i,j) = A(i,j1) * element->value();
557 C(i,j) += A(i,j1) * element->value();
565 for(
size_t j=0UL; j<B.columns(); ++j ) {
566 for(
size_t i=j+1UL; i<A.rows(); ++i ) {
567 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
589 template<
typename MT3
593 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
601 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
603 const size_t itmp(
min( ii+block, A.rows() ) );
605 for(
size_t j=0UL; j<B.columns(); ++j )
610 const size_t nonzeros( B.nonZeros(j) );
611 const size_t kpos( nonzeros &
size_t(-4) );
614 for(
size_t k=0UL; k<kpos; k+=4UL )
616 const size_t j1( element->index() );
617 const ET2 v1( element->value() );
619 const size_t j2( element->index() );
620 const ET2 v2( element->value() );
622 const size_t j3( element->index() );
623 const ET2 v3( element->value() );
625 const size_t j4( element->index() );
626 const ET2 v4( element->value() );
635 :( LOW ?
max(j,ii) : ii ) );
637 ?( ( SYM || HERM || UPP )
640 :( SYM || HERM || UPP ?
min(j+1UL,itmp) : itmp ) );
647 const size_t inum( iend - ibegin );
648 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
651 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
652 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
653 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
654 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
655 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
657 for(
size_t i=ipos; i<iend; ++i ) {
658 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
662 for( ; element!=
end; ++element )
664 const size_t j1( element->index() );
665 const ET2 v1( element->value() );
671 :( LOW ?
max(j,ii) : ii ) );
673 ?( ( SYM || HERM || UPP )
676 :( SYM || HERM || UPP ?
min(j+1UL,itmp) : itmp ) );
683 const size_t inum( iend - ibegin );
684 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
687 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
688 C(i ,j) += A(i ,j1) * v1;
689 C(i+1UL,j) += A(i+1UL,j1) * v1;
690 C(i+2UL,j) += A(i+2UL,j1) * v1;
691 C(i+3UL,j) += A(i+3UL,j1) * v1;
693 for(
size_t i=ipos; i<iend; ++i ) {
694 C(i,j) += A(i,j1) * v1;
701 for(
size_t j=0UL; j<B.columns(); ++j ) {
702 for(
size_t i=j+1UL; i<A.rows(); ++i ) {
703 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
725 template<
typename MT3
729 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
737 for(
size_t j=0UL; j<B.columns(); ++j )
742 const size_t nonzeros( B.nonZeros(j) );
743 const size_t kpos( nonzeros &
size_t(-4) );
746 for(
size_t k=0UL; k<kpos; k+=4UL )
748 const size_t j1( element->index() );
749 const ET2 v1( element->value() );
751 const size_t j2( element->index() );
752 const ET2 v2( element->value() );
754 const size_t j3( element->index() );
755 const ET2 v3( element->value() );
757 const size_t j4( element->index() );
758 const ET2 v4( element->value() );
770 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
771 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
772 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
775 ?( SYM || HERM || UPP ?
max(j+1UL,j4) : j4 )
776 :( SYM || HERM || UPP ?
max(j,j4)+1UL : j4+1UL ) )
777 :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
780 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
781 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
785 for( ; i<ipos; i+=SIMDSIZE ) {
786 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
788 for( ; remainder && i<iend; ++i ) {
789 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
793 for( ; element!=
end; ++element )
795 const size_t j1( element->index() );
796 const ET2 v1( element->value() );
802 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
803 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
804 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
807 ?( SYM || HERM || UPP ?
max(j+1UL,j1) : j1 )
808 :( SYM || HERM || UPP ?
max(j,j1)+1UL : j1+1UL ) )
809 :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
812 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
813 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
817 for( ; i<ipos; i+=SIMDSIZE ) {
818 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
820 for( ; remainder && i<iend; ++i ) {
821 C(i,j) += A(i,j1) * v1;
827 for(
size_t j=0UL; j<B.columns(); ++j ) {
828 for(
size_t i=j+1UL; i<A.rows(); ++i ) {
829 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
850 template<
typename MT
868 const ForwardFunctor fwd;
870 const TmpType tmp(
serial( rhs ) );
871 assign( ~lhs, fwd( tmp ) );
889 template<
typename MT
908 TDMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
927 template<
typename MT3
931 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
937 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
939 const size_t itmp(
min( ii+block, A.rows() ) );
941 for(
size_t j=0UL; j<B.columns(); ++j )
946 for( ; element!=
end; ++element )
948 const size_t j1( element->index() );
952 C(j1,j) += A(j1,j1) * element->value();
960 :( LOW ?
max(j,ii) : ii ) );
965 :( UPP ?
min(j+1UL,itmp) : itmp ) );
972 const size_t inum( iend - ibegin );
973 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
976 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
977 C(i ,j) += A(i ,j1) * element->value();
978 C(i+1UL,j) += A(i+1UL,j1) * element->value();
979 C(i+2UL,j) += A(i+2UL,j1) * element->value();
980 C(i+3UL,j) += A(i+3UL,j1) * element->value();
982 for(
size_t i=ipos; i<iend; ++i ) {
983 C(i,j) += A(i,j1) * element->value();
1007 template<
typename MT3
1011 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1017 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1019 const size_t itmp(
min( ii+block, A.rows() ) );
1021 for(
size_t j=0UL; j<B.columns(); ++j )
1026 const size_t nonzeros( B.nonZeros(j) );
1027 const size_t kpos( nonzeros &
size_t(-4) );
1030 for(
size_t k=0UL; k<kpos; k+=4UL )
1032 const size_t j1( element->index() );
1033 const ET2 v1( element->value() );
1035 const size_t j2( element->index() );
1036 const ET2 v2( element->value() );
1038 const size_t j3( element->index() );
1039 const ET2 v3( element->value() );
1041 const size_t j4( element->index() );
1042 const ET2 v4( element->value() );
1051 :( LOW ?
max(j,ii) : ii ) );
1056 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1063 const size_t inum( iend - ibegin );
1064 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1067 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1068 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1069 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1070 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1071 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1073 for(
size_t i=ipos; i<iend; ++i ) {
1074 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1078 for( ; element!=
end; ++element )
1080 const size_t j1( element->index() );
1081 const ET2 v1( element->value() );
1087 :( LOW ?
max(j,ii) : ii ) );
1092 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1099 const size_t inum( iend - ibegin );
1100 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1103 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1104 C(i ,j) += A(i ,j1) * v1;
1105 C(i+1UL,j) += A(i+1UL,j1) * v1;
1106 C(i+2UL,j) += A(i+2UL,j1) * v1;
1107 C(i+3UL,j) += A(i+3UL,j1) * v1;
1109 for(
size_t i=ipos; i<iend; ++i ) {
1110 C(i,j) += A(i,j1) * v1;
1133 template<
typename MT3
1137 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1143 for(
size_t j=0UL; j<B.columns(); ++j )
1148 const size_t nonzeros( B.nonZeros(j) );
1149 const size_t kpos( nonzeros &
size_t(-4) );
1152 for(
size_t k=0UL; k<kpos; k+=4UL )
1154 const size_t j1( element->index() );
1155 const ET2 v1( element->value() );
1157 const size_t j2( element->index() );
1158 const ET2 v2( element->value() );
1160 const size_t j3( element->index() );
1161 const ET2 v3( element->value() );
1163 const size_t j4( element->index() );
1164 const ET2 v4( element->value() );
1176 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1177 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1178 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1181 ?( UPP ?
max(j+1UL,j4) : j4 )
1182 :( UPP ?
max(j,j4)+1UL : j4+1UL ) )
1183 :( UPP ? j+1UL : A.rows() ) );
1186 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1187 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1191 for( ; i<ipos; i+=SIMDSIZE ) {
1192 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
1194 for( ; remainder && i<iend; ++i ) {
1195 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1199 for( ; element!=
end; ++element )
1201 const size_t j1( element->index() );
1202 const ET2 v1( element->value() );
1208 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1209 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1210 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1213 ?( UPP ?
max(j+1UL,j1) : j1 )
1214 :( UPP ?
max(j,j1)+1UL : j1+1UL ) )
1215 :( UPP ? j+1UL : A.rows() ) );
1218 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1219 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1223 for( ; i<ipos; i+=SIMDSIZE ) {
1224 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
1226 for( ; remainder && i<iend; ++i ) {
1227 C(i,j) += A(i,j1) * v1;
1252 template<
typename MT
1271 TDMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1290 template<
typename MT3
1294 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1300 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1302 const size_t itmp(
min( ii+block, A.rows() ) );
1304 for(
size_t j=0UL; j<B.columns(); ++j )
1309 for( ; element!=
end; ++element )
1311 const size_t j1( element->index() );
1315 C(j1,j) -= A(j1,j1) * element->value();
1323 :( LOW ?
max(j,ii) : ii ) );
1328 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1335 const size_t inum( iend - ibegin );
1336 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1339 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1340 C(i ,j) -= A(i ,j1) * element->value();
1341 C(i+1UL,j) -= A(i+1UL,j1) * element->value();
1342 C(i+2UL,j) -= A(i+2UL,j1) * element->value();
1343 C(i+3UL,j) -= A(i+3UL,j1) * element->value();
1345 for(
size_t i=ipos; i<iend; ++i ) {
1346 C(i,j) -= A(i,j1) * element->value();
1370 template<
typename MT3
1374 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1380 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1382 const size_t itmp(
min( ii+block, A.rows() ) );
1384 for(
size_t j=0UL; j<B.columns(); ++j )
1389 const size_t nonzeros( B.nonZeros(j) );
1390 const size_t kpos( nonzeros &
size_t(-4) );
1393 for(
size_t k=0UL; k<kpos; k+=4UL )
1395 const size_t j1( element->index() );
1396 const ET2 v1( element->value() );
1398 const size_t j2( element->index() );
1399 const ET2 v2( element->value() );
1401 const size_t j3( element->index() );
1402 const ET2 v3( element->value() );
1404 const size_t j4( element->index() );
1405 const ET2 v4( element->value() );
1414 :( LOW ?
max(j,ii) : ii ) );
1419 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1426 const size_t inum( iend - ibegin );
1427 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1430 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1431 C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1432 C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1433 C(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1434 C(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1436 for(
size_t i=ipos; i<iend; ++i ) {
1437 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1441 for( ; element!=
end; ++element )
1443 const size_t j1( element->index() );
1444 const ET2 v1( element->value() );
1450 :( LOW ?
max(j,ii) : ii ) );
1455 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1462 const size_t inum( iend - ibegin );
1463 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1466 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1467 C(i ,j) -= A(i ,j1) * v1;
1468 C(i+1UL,j) -= A(i+1UL,j1) * v1;
1469 C(i+2UL,j) -= A(i+2UL,j1) * v1;
1470 C(i+3UL,j) -= A(i+3UL,j1) * v1;
1472 for(
size_t i=ipos; i<iend; ++i ) {
1473 C(i,j) -= A(i,j1) * v1;
1496 template<
typename MT3
1500 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1506 for(
size_t j=0UL; j<B.columns(); ++j )
1511 const size_t nonzeros( B.nonZeros(j) );
1512 const size_t kpos( nonzeros &
size_t(-4) );
1515 for(
size_t k=0UL; k<kpos; k+=4UL )
1517 const size_t j1( element->index() );
1518 const ET2 v1( element->value() );
1520 const size_t j2( element->index() );
1521 const ET2 v2( element->value() );
1523 const size_t j3( element->index() );
1524 const ET2 v3( element->value() );
1526 const size_t j4( element->index() );
1527 const ET2 v4( element->value() );
1539 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1540 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1541 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1544 ?( UPP ?
max(j+1UL,j4) : j4 )
1545 :( UPP ?
max(j,j4)+1UL : j4+1UL ) )
1546 :( UPP ? j+1UL : A.rows() ) );
1549 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1550 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1554 for( ; i<ipos; i+=SIMDSIZE ) {
1555 C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 - A.load(i,j2) * xmm2 - A.load(i,j3) * xmm3 - A.load(i,j4) * xmm4 );
1557 for( ; remainder && i<iend; ++i ) {
1558 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1562 for( ; element!=
end; ++element )
1564 const size_t j1( element->index() );
1565 const ET2 v1( element->value() );
1571 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1572 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1573 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1576 ?( UPP ?
max(j+1UL,j1) : j1 )
1577 :( UPP ?
max(j,j1)+1UL : j1+1UL ) )
1578 :( UPP ? j+1UL : A.rows() ) );
1581 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1582 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1586 for( ; i<ipos; i+=SIMDSIZE ) {
1587 C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 );
1589 for( ; remainder && i<iend; ++i ) {
1590 C(i,j) -= A(i,j1) * v1;
1615 template<
typename MT
1629 schurAssign( ~lhs, tmp );
1661 template<
typename MT
1701 template<
typename MT
1720 const ForwardFunctor fwd;
1722 const TmpType tmp( rhs );
1743 template<
typename MT
1787 template<
typename MT
1829 template<
typename MT
1910 template<
typename MT1
1912 inline decltype(
auto)
1959 template<
typename MT1
1974 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2004 template<
typename MT1
2019 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2049 template<
typename MT1
2064 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2094 template<
typename MT1
2109 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2139 template<
typename MT1
2154 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2170 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2171 struct Rows< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2188 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2189 struct Columns< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2206 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2207 struct IsAligned< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2224 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2225 struct IsSymmetric< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2228 , IsBuiltin< ElementType_< TDMatTSMatMultExpr<MT1,MT2,false,true,false,false> > > >
2229 , And< Bool<LF>, Bool<UF> > >::value >
2245 template<
typename MT1,
typename MT2,
bool SF,
bool LF,
bool UF >
2246 struct IsHermitian< TDMatTSMatMultExpr<MT1,MT2,SF,true,LF,UF> >
2263 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2264 struct IsLower< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2266 , And< IsLower<MT1>, IsLower<MT2> >
2267 , And< Or< Bool<SF>, Bool<HF> >
2268 , IsUpper<MT1>, IsUpper<MT2> > >::value >
2284 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2285 struct IsUniLower< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2286 :
public BoolConstant< Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
2287 , And< Or< Bool<SF>, Bool<HF> >
2288 , IsUniUpper<MT1>, IsUniUpper<MT2> > >::value >
2304 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2306 :
public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2307 , And< IsStrictlyLower<MT2>, IsLower<MT1> >
2308 , And< Or< Bool<SF>, Bool<HF> >
2309 , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2310 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >::value >
2326 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2327 struct IsUpper< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2329 , And< IsUpper<MT1>, IsUpper<MT2> >
2330 , And< Or< Bool<SF>, Bool<HF> >
2331 , IsLower<MT1>, IsLower<MT2> > >::value >
2347 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2348 struct IsUniUpper< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2349 :
public BoolConstant< Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
2350 , And< Or< Bool<SF>, Bool<HF> >
2351 , IsUniLower<MT1>, IsUniLower<MT2> > >::value >
2367 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2369 :
public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2370 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
2371 , And< Or< Bool<SF>, Bool<HF> >
2372 , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2373 , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >::value >
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
RightOperand rhs_
Right-hand side sparse matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:452
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:400
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:996
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
Subvector< VT, AF > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:322
Expression object for transpose dense matrix-transpose sparse matrix multiplications.The TDMatTSMatMultExpr class represents the compile time expression for multiplications between a column-major dense matrix and a column-major sparse matrix.
Definition: Forward.h:155
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTSMatMultExpr.h:390
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTSMatMultExpr.h:262
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
CompositeType_< MT2 > CT2
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:137
Header file for the DeclUpp functor.
ElementType_< RT2 > ET2
Element type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:135
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:198
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTSMatMultExpr.h:424
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:560
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatTSMatMultExpr.h:412
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1027
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:250
Column< MT > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:124
Header file for the Computation base class.
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:259
Header file for the MatMatMultExpr base class.
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1809
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatTSMatMultExpr.h:251
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:247
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:78
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:129
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
ResultType_< MT2 > RT2
Result type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:133
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Constraint on the data type.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Headerfile for the generic max algorithm.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: TDMatTSMatMultExpr.h:380
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: TDMatTSMatMultExpr.h:370
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the DeclLow functor.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
TDMatTSMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the TDMatTSMatMultExpr class.
Definition: TDMatTSMatMultExpr.h:291
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:252
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:3087
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1027
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
Flag for upper matrices.
Definition: TDMatTSMatMultExpr.h:156
Generic wrapper for the null function.
Definition: Noop.h:58
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:264
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:265
Header file for the DeclDiag functor.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:306
Constraint on the data type.
Header file for all forward declarations for expression class templates.
ElementType_< ResultType > ElementType
Resulting element type.
Definition: TDMatTSMatMultExpr.h:250
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:132
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:108
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
Header file for the conjugate shim.
Compile time check for resizable data types.This type trait tests whether the given data type is a re...
Definition: IsResizable.h:75
Header file for the IsSIMDCombinable type trait.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:110
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:249
Flag for symmetric matrices.
Definition: TDMatTSMatMultExpr.h:153
Utility type for generic codes.
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:134
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:136
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1029
Header file for the isDefault shim.
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:819
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
Header file for the RemoveReference type trait.
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:263
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3082
typename T::ConstIterator ConstIterator_
Alias declaration for nested ConstIterator type definitions.The ConstIterator_ alias declaration prov...
Definition: Aliases.h:103
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:354
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1029
Flag for Hermitian matrices.
Definition: TDMatTSMatMultExpr.h:154
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Compile time logical or evaluation.The Or alias declaration performs at compile time a logical or ('&&...
Definition: Or.h:76
Flag for lower matrices.
Definition: TDMatTSMatMultExpr.h:155
Header file for the IntegralConstant class template.
Compile time evaluation of the number of columns of a matrix.The Columns type trait evaluates the num...
Definition: Columns.h:75
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:451
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:75
Header file for the DeclHerm functor.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTSMatMultExpr.h:444
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:600
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:256
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1321
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTSMatMultExpr.h:253
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:742
Header file for the IsResizable type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:61
Header file for the IsExpression type trait class.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTSMatMultExpr.h:434
Header file for the function trace functionality.
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:248