35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_ 118 template<
typename MT1
124 class TDMatTSMatMultExpr
125 :
public MatMatMultExpr< DenseMatrix< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, true > >
126 ,
private Computation
151 SYM = ( SF && !( HF || LF || UF ) ),
152 HERM = ( HF && !( LF || UF ) ),
153 LOW = ( LF || ( ( SF || HF ) && UF ) ),
154 UPP = ( UF || ( ( SF || HF ) && LF ) )
164 template<
typename T1,
typename T2,
typename T3 >
165 struct IsEvaluationRequired {
166 enum :
bool { value = ( evaluateLeft || evaluateRight ) };
176 template<
typename T1,
typename T2,
typename T3 >
177 struct UseVectorizedKernel {
178 enum :
bool { value = useOptimizedKernels &&
180 T1::simdEnabled && T2::simdEnabled &&
197 template<
typename T1,
typename T2,
typename T3 >
198 struct UseOptimizedKernel {
199 enum :
bool { value = useOptimizedKernels &&
200 !UseVectorizedKernel<T1,T2,T3>::value &&
213 template<
typename T1,
typename T2,
typename T3 >
214 struct UseDefaultKernel {
215 enum :
bool { value = !UseVectorizedKernel<T1,T2,T3>::value &&
216 !UseOptimizedKernel<T1,T2,T3>::value };
274 enum :
bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
275 !evaluateRight && MT2::smpAssignable };
330 :(
lhs_.columns() ) ) );
334 const size_t n(
end - begin );
354 if( i >=
lhs_.rows() ) {
357 if( j >=
rhs_.columns() ) {
369 inline size_t rows() const noexcept {
380 return rhs_.columns();
410 template<
typename T >
411 inline bool canAlias(
const T* alias )
const noexcept {
412 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
422 template<
typename T >
423 inline bool isAliased(
const T* alias )
const noexcept {
424 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
434 return lhs_.isAligned();
467 template<
typename MT
486 TDMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
505 template<
typename MT3
509 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
517 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
519 const size_t itmp(
min( ii+block, A.rows() ) );
521 for(
size_t j=0UL; j<B.columns(); ++j )
526 for( ; element!=
end; ++element )
528 const size_t j1( element->index() );
532 C(j1,j) = A(j1,j1) * element->value();
540 :( LOW ?
max(j,ii) : ii ) );
542 ?( ( SYM || HERM || UPP )
545 :( SYM || HERM || UPP ?
min(j+1UL,itmp) : itmp ) );
552 for(
size_t i=ibegin; i<iend; ++i ) {
554 C(i,j) = A(i,j1) * element->value();
556 C(i,j) += A(i,j1) * element->value();
564 for(
size_t j=0UL; j<B.columns(); ++j ) {
565 for(
size_t i=j+1UL; i<A.rows(); ++i ) {
566 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
588 template<
typename MT3
592 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
600 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
602 const size_t itmp(
min( ii+block, A.rows() ) );
604 for(
size_t j=0UL; j<B.columns(); ++j )
609 const size_t nonzeros( B.nonZeros(j) );
610 const size_t kpos( nonzeros &
size_t(-4) );
613 for(
size_t k=0UL; k<kpos; k+=4UL )
615 const size_t j1( element->index() );
616 const ET2 v1( element->value() );
618 const size_t j2( element->index() );
619 const ET2 v2( element->value() );
621 const size_t j3( element->index() );
622 const ET2 v3( element->value() );
624 const size_t j4( element->index() );
625 const ET2 v4( element->value() );
634 :( LOW ?
max(j,ii) : ii ) );
636 ?( ( SYM || HERM || UPP )
639 :( SYM || HERM || UPP ?
min(j+1UL,itmp) : itmp ) );
646 const size_t inum( iend - ibegin );
647 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
650 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
651 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
652 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
653 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
654 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
656 for(
size_t i=ipos; i<iend; ++i ) {
657 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
661 for( ; element!=
end; ++element )
663 const size_t j1( element->index() );
664 const ET2 v1( element->value() );
670 :( LOW ?
max(j,ii) : ii ) );
672 ?( ( SYM || HERM || UPP )
675 :( SYM || HERM || UPP ?
min(j+1UL,itmp) : itmp ) );
682 const size_t inum( iend - ibegin );
683 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
686 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
687 C(i ,j) += A(i ,j1) * v1;
688 C(i+1UL,j) += A(i+1UL,j1) * v1;
689 C(i+2UL,j) += A(i+2UL,j1) * v1;
690 C(i+3UL,j) += A(i+3UL,j1) * v1;
692 for(
size_t i=ipos; i<iend; ++i ) {
693 C(i,j) += A(i,j1) * v1;
700 for(
size_t j=0UL; j<B.columns(); ++j ) {
701 for(
size_t i=j+1UL; i<A.rows(); ++i ) {
702 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
724 template<
typename MT3
728 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
736 for(
size_t j=0UL; j<B.columns(); ++j )
741 const size_t nonzeros( B.nonZeros(j) );
742 const size_t kpos( nonzeros &
size_t(-4) );
745 for(
size_t k=0UL; k<kpos; k+=4UL )
747 const size_t j1( element->index() );
748 const ET2 v1( element->value() );
750 const size_t j2( element->index() );
751 const ET2 v2( element->value() );
753 const size_t j3( element->index() );
754 const ET2 v3( element->value() );
756 const size_t j4( element->index() );
757 const ET2 v4( element->value() );
769 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
770 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
771 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
774 ?( SYM || HERM || UPP ?
max(j+1UL,j4) : j4 )
775 :( SYM || HERM || UPP ?
max(j,j4)+1UL : j4+1UL ) )
776 :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
779 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
780 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
784 for( ; i<ipos; i+=SIMDSIZE ) {
785 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
787 for( ; remainder && i<iend; ++i ) {
788 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
792 for( ; element!=
end; ++element )
794 const size_t j1( element->index() );
795 const ET2 v1( element->value() );
801 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
802 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
803 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
806 ?( SYM || HERM || UPP ?
max(j+1UL,j1) : j1 )
807 :( SYM || HERM || UPP ?
max(j,j1)+1UL : j1+1UL ) )
808 :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
811 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
812 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
816 for( ; i<ipos; i+=SIMDSIZE ) {
817 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
819 for( ; remainder && i<iend; ++i ) {
820 C(i,j) += A(i,j1) * v1;
826 for(
size_t j=0UL; j<B.columns(); ++j ) {
827 for(
size_t i=j+1UL; i<A.rows(); ++i ) {
828 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
849 template<
typename MT
867 const ForwardFunctor fwd;
869 const TmpType tmp(
serial( rhs ) );
870 assign( ~lhs, fwd( tmp ) );
888 template<
typename MT
907 TDMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
926 template<
typename MT3
930 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
936 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
938 const size_t itmp(
min( ii+block, A.rows() ) );
940 for(
size_t j=0UL; j<B.columns(); ++j )
945 for( ; element!=
end; ++element )
947 const size_t j1( element->index() );
951 C(j1,j) += A(j1,j1) * element->value();
959 :( LOW ?
max(j,ii) : ii ) );
964 :( UPP ?
min(j+1UL,itmp) : itmp ) );
971 const size_t inum( iend - ibegin );
972 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
975 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
976 C(i ,j) += A(i ,j1) * element->value();
977 C(i+1UL,j) += A(i+1UL,j1) * element->value();
978 C(i+2UL,j) += A(i+2UL,j1) * element->value();
979 C(i+3UL,j) += A(i+3UL,j1) * element->value();
981 for(
size_t i=ipos; i<iend; ++i ) {
982 C(i,j) += A(i,j1) * element->value();
1006 template<
typename MT3
1010 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1016 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1018 const size_t itmp(
min( ii+block, A.rows() ) );
1020 for(
size_t j=0UL; j<B.columns(); ++j )
1025 const size_t nonzeros( B.nonZeros(j) );
1026 const size_t kpos( nonzeros &
size_t(-4) );
1029 for(
size_t k=0UL; k<kpos; k+=4UL )
1031 const size_t j1( element->index() );
1032 const ET2 v1( element->value() );
1034 const size_t j2( element->index() );
1035 const ET2 v2( element->value() );
1037 const size_t j3( element->index() );
1038 const ET2 v3( element->value() );
1040 const size_t j4( element->index() );
1041 const ET2 v4( element->value() );
1050 :( LOW ?
max(j,ii) : ii ) );
1055 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1062 const size_t inum( iend - ibegin );
1063 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1066 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1067 C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1068 C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1069 C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1070 C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1072 for(
size_t i=ipos; i<iend; ++i ) {
1073 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1077 for( ; element!=
end; ++element )
1079 const size_t j1( element->index() );
1080 const ET2 v1( element->value() );
1086 :( LOW ?
max(j,ii) : ii ) );
1091 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1098 const size_t inum( iend - ibegin );
1099 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1102 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1103 C(i ,j) += A(i ,j1) * v1;
1104 C(i+1UL,j) += A(i+1UL,j1) * v1;
1105 C(i+2UL,j) += A(i+2UL,j1) * v1;
1106 C(i+3UL,j) += A(i+3UL,j1) * v1;
1108 for(
size_t i=ipos; i<iend; ++i ) {
1109 C(i,j) += A(i,j1) * v1;
1132 template<
typename MT3
1136 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1142 for(
size_t j=0UL; j<B.columns(); ++j )
1147 const size_t nonzeros( B.nonZeros(j) );
1148 const size_t kpos( nonzeros &
size_t(-4) );
1151 for(
size_t k=0UL; k<kpos; k+=4UL )
1153 const size_t j1( element->index() );
1154 const ET2 v1( element->value() );
1156 const size_t j2( element->index() );
1157 const ET2 v2( element->value() );
1159 const size_t j3( element->index() );
1160 const ET2 v3( element->value() );
1162 const size_t j4( element->index() );
1163 const ET2 v4( element->value() );
1175 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1176 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1177 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1180 ?( UPP ?
max(j+1UL,j4) : j4 )
1181 :( UPP ?
max(j,j4)+1UL : j4+1UL ) )
1182 :( UPP ? j+1UL : A.rows() ) );
1185 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1186 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1190 for( ; i<ipos; i+=SIMDSIZE ) {
1191 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
1193 for( ; remainder && i<iend; ++i ) {
1194 C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1198 for( ; element!=
end; ++element )
1200 const size_t j1( element->index() );
1201 const ET2 v1( element->value() );
1207 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1208 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1209 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1212 ?( UPP ?
max(j+1UL,j1) : j1 )
1213 :( UPP ?
max(j,j1)+1UL : j1+1UL ) )
1214 :( UPP ? j+1UL : A.rows() ) );
1217 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1218 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1222 for( ; i<ipos; i+=SIMDSIZE ) {
1223 C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
1225 for( ; remainder && i<iend; ++i ) {
1226 C(i,j) += A(i,j1) * v1;
1251 template<
typename MT
1270 TDMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1289 template<
typename MT3
1293 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1299 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1301 const size_t itmp(
min( ii+block, A.rows() ) );
1303 for(
size_t j=0UL; j<B.columns(); ++j )
1308 for( ; element!=
end; ++element )
1310 const size_t j1( element->index() );
1314 C(j1,j) -= A(j1,j1) * element->value();
1322 :( LOW ?
max(j,ii) : ii ) );
1327 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1334 const size_t inum( iend - ibegin );
1335 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1338 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1339 C(i ,j) -= A(i ,j1) * element->value();
1340 C(i+1UL,j) -= A(i+1UL,j1) * element->value();
1341 C(i+2UL,j) -= A(i+2UL,j1) * element->value();
1342 C(i+3UL,j) -= A(i+3UL,j1) * element->value();
1344 for(
size_t i=ipos; i<iend; ++i ) {
1345 C(i,j) -= A(i,j1) * element->value();
1369 template<
typename MT3
1373 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1379 for(
size_t ii=0UL; ii<A.rows(); ii+=block )
1381 const size_t itmp(
min( ii+block, A.rows() ) );
1383 for(
size_t j=0UL; j<B.columns(); ++j )
1388 const size_t nonzeros( B.nonZeros(j) );
1389 const size_t kpos( nonzeros &
size_t(-4) );
1392 for(
size_t k=0UL; k<kpos; k+=4UL )
1394 const size_t j1( element->index() );
1395 const ET2 v1( element->value() );
1397 const size_t j2( element->index() );
1398 const ET2 v2( element->value() );
1400 const size_t j3( element->index() );
1401 const ET2 v3( element->value() );
1403 const size_t j4( element->index() );
1404 const ET2 v4( element->value() );
1413 :( LOW ?
max(j,ii) : ii ) );
1418 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1425 const size_t inum( iend - ibegin );
1426 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1429 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1430 C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1431 C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1432 C(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1433 C(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1435 for(
size_t i=ipos; i<iend; ++i ) {
1436 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1440 for( ; element!=
end; ++element )
1442 const size_t j1( element->index() );
1443 const ET2 v1( element->value() );
1449 :( LOW ?
max(j,ii) : ii ) );
1454 :( UPP ?
min(j+1UL,itmp) : itmp ) );
1461 const size_t inum( iend - ibegin );
1462 const size_t ipos( ibegin + ( inum &
size_t(-4) ) );
1465 for(
size_t i=ibegin; i<ipos; i+=4UL ) {
1466 C(i ,j) -= A(i ,j1) * v1;
1467 C(i+1UL,j) -= A(i+1UL,j1) * v1;
1468 C(i+2UL,j) -= A(i+2UL,j1) * v1;
1469 C(i+3UL,j) -= A(i+3UL,j1) * v1;
1471 for(
size_t i=ipos; i<iend; ++i ) {
1472 C(i,j) -= A(i,j1) * v1;
1495 template<
typename MT3
1499 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1505 for(
size_t j=0UL; j<B.columns(); ++j )
1510 const size_t nonzeros( B.nonZeros(j) );
1511 const size_t kpos( nonzeros &
size_t(-4) );
1514 for(
size_t k=0UL; k<kpos; k+=4UL )
1516 const size_t j1( element->index() );
1517 const ET2 v1( element->value() );
1519 const size_t j2( element->index() );
1520 const ET2 v2( element->value() );
1522 const size_t j3( element->index() );
1523 const ET2 v3( element->value() );
1525 const size_t j4( element->index() );
1526 const ET2 v4( element->value() );
1538 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1539 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1540 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1543 ?( UPP ?
max(j+1UL,j4) : j4 )
1544 :( UPP ?
max(j,j4)+1UL : j4+1UL ) )
1545 :( UPP ? j+1UL : A.rows() ) );
1548 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1549 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1553 for( ; i<ipos; i+=SIMDSIZE ) {
1554 C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 - A.load(i,j2) * xmm2 - A.load(i,j3) * xmm3 - A.load(i,j4) * xmm4 );
1556 for( ; remainder && i<iend; ++i ) {
1557 C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1561 for( ; element!=
end; ++element )
1563 const size_t j1( element->index() );
1564 const ET2 v1( element->value() );
1570 ?( ( LOW ?
max(j,j1+1UL) : j1+1UL ) &
size_t(-SIMDSIZE) )
1571 :( ( LOW ?
max(j,j1) : j1 ) &
size_t(-SIMDSIZE) ) )
1572 :( LOW ? ( j &
size_t(-SIMDSIZE) ) : 0UL ) );
1575 ?( UPP ?
max(j+1UL,j1) : j1 )
1576 :( UPP ?
max(j,j1)+1UL : j1+1UL ) )
1577 :( UPP ? j+1UL : A.rows() ) );
1580 const size_t ipos( remainder ? ( iend &
size_t(-SIMDSIZE) ) : iend );
1581 BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos,
"Invalid end calculation" );
1585 for( ; i<ipos; i+=SIMDSIZE ) {
1586 C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 );
1588 for( ; remainder && i<iend; ++i ) {
1589 C(i,j) -= A(i,j1) * v1;
1614 template<
typename MT
1628 schurAssign( ~lhs, tmp );
1660 template<
typename MT
1700 template<
typename MT
1719 const ForwardFunctor fwd;
1721 const TmpType tmp( rhs );
1742 template<
typename MT
1786 template<
typename MT
1828 template<
typename MT
1909 template<
typename MT1
1911 inline decltype(
auto)
1958 template<
typename MT1
1973 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2003 template<
typename MT1
2018 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2048 template<
typename MT1
2063 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2093 template<
typename MT1
2108 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2138 template<
typename MT1
2153 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2169 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2170 struct Size< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2171 :
public Size<MT1,0UL>
2174 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2175 struct Size< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2176 :
public Size<MT2,1UL>
2192 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2193 struct IsAligned< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2210 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2211 struct IsSymmetric< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2212 :
public Or< Bool<SF>
2214 , IsBuiltin< ElementType_< TDMatTSMatMultExpr<MT1,MT2,false,true,false,false> > > >
2215 , And< Bool<LF>, Bool<UF> > >
2231 template<
typename MT1,
typename MT2,
bool SF,
bool LF,
bool UF >
2232 struct IsHermitian< TDMatTSMatMultExpr<MT1,MT2,SF,true,LF,UF> >
2249 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2250 struct IsLower< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2251 :
public Or< Bool<LF>
2252 , And< IsLower<MT1>, IsLower<MT2> >
2253 , And< Or< Bool<SF>, Bool<HF> >
2254 , IsUpper<MT1>, IsUpper<MT2> > >
2270 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2271 struct IsUniLower< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2272 :
public Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
2273 , And< Or< Bool<SF>, Bool<HF> >
2274 , IsUniUpper<MT1>, IsUniUpper<MT2> > >
2290 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2292 :
public Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2293 , And< IsStrictlyLower<MT2>, IsLower<MT1> >
2294 , And< Or< Bool<SF>, Bool<HF> >
2295 , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2296 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >
2312 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2313 struct IsUpper< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2314 :
public Or< Bool<UF>
2315 , And< IsUpper<MT1>, IsUpper<MT2> >
2316 , And< Or< Bool<SF>, Bool<HF> >
2317 , IsLower<MT1>, IsLower<MT2> > >
2333 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2334 struct IsUniUpper< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2335 :
public Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
2336 , And< Or< Bool<SF>, Bool<HF> >
2337 , IsUniLower<MT1>, IsUniLower<MT2> > >
2353 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2355 :
public Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2356 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
2357 , And< Or< Bool<SF>, Bool<HF> >
2358 , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2359 , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:329
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:131
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:71
RightOperand rhs_
Right-hand side sparse matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:451
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:399
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:996
Flag for symmetric matrices.
Definition: TDMatTSMatMultExpr.h:151
Header file for the IsUniUpper type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:86
Header file for basic type definitions.
Flag for lower matrices.
Definition: TDMatTSMatMultExpr.h:153
Flag for Hermitian matrices.
Definition: TDMatTSMatMultExpr.h:152
Expression object for transpose dense matrix-transpose sparse matrix multiplications.The TDMatTSMatMultExpr class represents the compile time expression for multiplications between a column-major dense matrix and a column-major sparse matrix.
Definition: Forward.h:155
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTSMatMultExpr.h:389
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTSMatMultExpr.h:260
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
CompositeType_< MT2 > CT2
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:135
Header file for the DeclUpp functor.
ElementType_< RT2 > ET2
Element type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:133
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:364
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTSMatMultExpr.h:423
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:588
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:87
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatTSMatMultExpr.h:411
Flag for upper matrices.
Definition: TDMatTSMatMultExpr.h:154
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1026
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:291
Header file for the Computation base class.
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:257
Header file for the MatMatMultExpr base class.
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:87
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1950
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatTSMatMultExpr.h:249
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:245
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:80
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:129
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
ResultType_< MT2 > RT2
Result type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:131
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Constraint on the data type.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:71
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Headerfile for the generic max algorithm.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: TDMatTSMatMultExpr.h:379
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: TDMatTSMatMultExpr.h:369
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the DeclLow functor.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
TDMatTSMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the TDMatTSMatMultExpr class.
Definition: TDMatTSMatMultExpr.h:289
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:250
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:3085
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1026
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:89
Generic wrapper for the null function.
Definition: Noop.h:59
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:430
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:263
Header file for the DeclDiag functor.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:304
Constraint on the data type.
Header file for all forward declarations for expression class templates.
ElementType_< ResultType > ElementType
Resulting element type.
Definition: TDMatTSMatMultExpr.h:248
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:130
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:107
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
Header file for the conjugate shim.
Compile time check for resizable data types.This type trait tests whether the given data type is a re...
Definition: IsResizable.h:75
Header file for the IsSIMDCombinable type trait.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:110
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:247
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:132
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:134
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:131
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1028
Header file for the isDefault shim.
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:816
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
Header file for the RemoveReference type trait.
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:263
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3080
typename T::ConstIterator ConstIterator_
Alias declaration for nested ConstIterator type definitions.The ConstIterator_ alias declaration prov...
Definition: Aliases.h:103
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:353
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1028
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Compile time logical 'or' evaluation.The Or alias declaration performs at compile time a logical 'or'...
Definition: Or.h:76
Compile time evaluation of the size of vectors and matrices.The Size type trait evaluates the size of...
Definition: Size.h:80
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:450
Header file for the DeclHerm functor.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTSMatMultExpr.h:443
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:628
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:254
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1321
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTSMatMultExpr.h:251
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:908
Header file for the IsResizable type trait.
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:61
Header file for the IsExpression type trait class.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTSMatMultExpr.h:433
Header file for the function trace functionality.
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:246