35 #ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_ 119 template<
typename MT1
125 class SMatDMatMultExpr
126 :
public MatMatMultExpr< DenseMatrix< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false > >
127 ,
private Computation
152 SYM = ( SF && !( HF || LF || UF ) ),
153 HERM = ( HF && !( LF || UF ) ),
154 LOW = ( LF || ( ( SF || HF ) && UF ) ),
155 UPP = ( UF || ( ( SF || HF ) && LF ) )
165 template<
typename T1,
typename T2,
typename T3 >
166 struct IsEvaluationRequired {
167 enum :
bool { value = ( evaluateLeft || evaluateRight ) };
177 template<
typename T1,
typename T2,
typename T3 >
178 struct UseVectorizedKernel {
179 enum :
bool { value = useOptimizedKernels &&
181 T1::simdEnabled && T3::simdEnabled &&
198 template<
typename T1,
typename T2,
typename T3 >
199 struct UseOptimizedKernel {
200 enum :
bool { value = useOptimizedKernels &&
201 !UseVectorizedKernel<T1,T2,T3>::value &&
214 template<
typename T1,
typename T2,
typename T3 >
215 struct UseDefaultKernel {
216 enum :
bool { value = !UseVectorizedKernel<T1,T2,T3>::value &&
217 !UseOptimizedKernel<T1,T2,T3>::value };
275 enum :
bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
276 !evaluateRight && MT2::smpAssignable };
331 :(
lhs_.columns() ) ) );
335 const size_t n(
end - begin );
354 if( i >=
lhs_.rows() ) {
357 if( j >=
rhs_.columns() ) {
369 inline size_t rows() const noexcept {
380 return rhs_.columns();
410 template<
typename T >
411 inline bool canAlias(
const T* alias )
const noexcept {
412 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
422 template<
typename T >
423 inline bool isAliased(
const T* alias )
const noexcept {
424 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
434 return rhs_.isAligned();
467 template<
typename MT
486 SMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
505 template<
typename MT3
509 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
517 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
519 const size_t jtmp(
min( jj+block, B.columns() ) );
521 for(
size_t i=0UL; i<A.rows(); ++i )
526 for( ; element!=
end; ++element )
528 const size_t i1( element->index() );
532 C(i,i1) = element->value() * B(i1,i1);
542 ?( ( SYM || HERM || LOW )
545 :( SYM || HERM || LOW ?
min(i+1UL,jtmp) : jtmp ) );
552 for(
size_t j=jbegin; j<jend; ++j ) {
554 C(i,j) = element->value() * B(i1,j);
556 C(i,j) += element->value() * B(i1,j);
564 for(
size_t i=0UL; i<A.rows(); ++i ) {
565 for(
size_t j=i+1UL; j<B.columns(); ++j ) {
566 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
588 template<
typename MT3
592 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
600 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
602 const size_t jtmp(
min( jj+block, B.columns() ) );
604 for(
size_t i=0UL; i<A.rows(); ++i )
609 const size_t nonzeros( A.nonZeros(i) );
610 const size_t kpos( nonzeros &
size_t(-4) );
613 for(
size_t k=0UL; k<kpos; k+=4UL )
615 const size_t i1( element->index() );
616 const ET1 v1( element->value() );
618 const size_t i2( element->index() );
619 const ET1 v2( element->value() );
621 const size_t i3( element->index() );
622 const ET1 v3( element->value() );
624 const size_t i4( element->index() );
625 const ET1 v4( element->value() );
634 :( UPP ?
max(i,jj) : jj ) );
636 ?( ( SYM || HERM || LOW )
639 :( SYM || HERM || LOW ?
min(i+1UL,jtmp) : jtmp ) );
646 const size_t jnum( jend - jbegin );
647 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
650 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
651 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
652 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
653 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
654 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
656 for(
size_t j=jpos; j<jend; ++j ) {
657 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
661 for( ; element!=
end; ++element )
663 const size_t i1( element->index() );
664 const ET1 v1( element->value() );
670 :( UPP ?
max(i,jj) : jj ) );
672 ?( ( SYM || HERM || LOW )
675 :( SYM || HERM || LOW ?
min(i+1UL,jtmp) : jtmp ) );
682 const size_t jnum( jend - jbegin );
683 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
686 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
687 C(i,j ) += v1 * B(i1,j );
688 C(i,j+1UL) += v1 * B(i1,j+1UL);
689 C(i,j+2UL) += v1 * B(i1,j+2UL);
690 C(i,j+3UL) += v1 * B(i1,j+3UL);
692 for(
size_t j=jpos; j<jend; ++j ) {
693 C(i,j) += v1 * B(i1,j);
700 for(
size_t i=0UL; i<A.rows(); ++i ) {
701 for(
size_t j=i+1UL; j<B.columns(); ++j ) {
702 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
724 template<
typename MT3
728 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
736 for(
size_t i=0UL; i<A.rows(); ++i )
741 const size_t nonzeros( A.nonZeros(i) );
742 const size_t kpos( nonzeros &
size_t(-4) );
745 for(
size_t k=0UL; k<kpos; k+=4UL )
747 const size_t i1( element->index() );
748 const ET1 v1( element->value() );
750 const size_t i2( element->index() );
751 const ET1 v2( element->value() );
753 const size_t i3( element->index() );
754 const ET1 v3( element->value() );
756 const size_t i4( element->index() );
757 const ET1 v4( element->value() );
769 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
770 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
771 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
774 ?( SYM || HERM || LOW ?
min(i+1UL,i4) : i4 )
775 :( SYM || HERM || LOW ?
min(i,i4)+1UL : i4+1UL ) )
776 :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
779 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
780 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
784 for( ; j<jpos; j+=SIMDSIZE ) {
785 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
787 for( ; remainder && j<jend; ++j ) {
788 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
792 for( ; element!=
end; ++element )
794 const size_t i1( element->index() );
795 const ET1 v1( element->value() );
801 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
802 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
803 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
806 ?( SYM || HERM || LOW ?
min(i+1UL,i1) : i1 )
807 :( SYM || HERM || LOW ?
min(i,i1)+1UL : i1+1UL ) )
808 :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
811 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
812 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
816 for( ; j<jpos; j+=SIMDSIZE ) {
817 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
819 for( ; remainder && j<jend; ++j ) {
820 C(i,j) += v1 * B(i1,j);
826 for(
size_t i=0UL; i<A.rows(); ++i ) {
827 for(
size_t j=i+1UL; j<B.columns(); ++j ) {
828 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
849 template<
typename MT
867 const ForwardFunctor fwd;
869 const TmpType tmp(
serial( rhs ) );
870 assign( ~lhs, fwd( tmp ) );
888 template<
typename MT
907 SMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
926 template<
typename MT3
930 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
936 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
938 const size_t jtmp(
min( jj+block, B.columns() ) );
940 for(
size_t i=0UL; i<A.rows(); ++i )
945 for( ; element!=
end; ++element )
947 const size_t i1( element->index() );
951 C(i,i1) += element->value() * B(i1,i1);
964 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
971 const size_t jnum( jend - jbegin );
972 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
975 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
976 C(i,j ) += element->value() * B(i1,j );
977 C(i,j+1UL) += element->value() * B(i1,j+1UL);
978 C(i,j+2UL) += element->value() * B(i1,j+2UL);
979 C(i,j+3UL) += element->value() * B(i1,j+3UL);
981 for(
size_t j=jpos; j<jend; ++j ) {
982 C(i,j) += element->value() * B(i1,j);
1006 template<
typename MT3
1010 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1016 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1018 const size_t jtmp(
min( jj+block, B.columns() ) );
1020 for(
size_t i=0UL; i<A.rows(); ++i )
1025 const size_t nonzeros( A.nonZeros(i) );
1026 const size_t kpos( nonzeros &
size_t(-4) );
1029 for(
size_t k=0UL; k<kpos; k+=4UL )
1031 const size_t i1( element->index() );
1032 const ET1 v1( element->value() );
1034 const size_t i2( element->index() );
1035 const ET1 v2( element->value() );
1037 const size_t i3( element->index() );
1038 const ET1 v3( element->value() );
1040 const size_t i4( element->index() );
1041 const ET1 v4( element->value() );
1050 :( UPP ?
max(i,jj) : jj ) );
1055 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1062 const size_t jnum( jend - jbegin );
1063 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1066 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1067 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1068 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1069 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1070 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1072 for(
size_t j=jpos; j<jend; ++j ) {
1073 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1077 for( ; element!=
end; ++element )
1079 const size_t i1( element->index() );
1080 const ET1 v1( element->value() );
1086 :( UPP ?
max(i,jj) : jj ) );
1091 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1098 const size_t jnum( jend - jbegin );
1099 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1102 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1103 C(i,j ) += v1 * B(i1,j );
1104 C(i,j+1UL) += v1 * B(i1,j+1UL);
1105 C(i,j+2UL) += v1 * B(i1,j+2UL);
1106 C(i,j+3UL) += v1 * B(i1,j+3UL);
1108 for(
size_t j=jpos; j<jend; ++j ) {
1109 C(i,j) += v1 * B(i1,j);
1132 template<
typename MT3
1136 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1142 for(
size_t i=0UL; i<A.rows(); ++i )
1147 const size_t nonzeros( A.nonZeros(i) );
1148 const size_t kpos( nonzeros &
size_t(-4) );
1151 for(
size_t k=0UL; k<kpos; k+=4UL )
1153 const size_t i1( element->index() );
1154 const ET1 v1( element->value() );
1156 const size_t i2( element->index() );
1157 const ET1 v2( element->value() );
1159 const size_t i3( element->index() );
1160 const ET1 v3( element->value() );
1162 const size_t i4( element->index() );
1163 const ET1 v4( element->value() );
1175 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1176 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1177 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1180 ?( LOW ?
min(i+1UL,i4) : i4 )
1181 :( LOW ?
min(i,i4)+1UL : i4+1UL ) )
1182 :( LOW ? i+1UL : B.columns() ) );
1185 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1186 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1190 for( ; j<jpos; j+=SIMDSIZE ) {
1191 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
1193 for( ; remainder && j<jend; ++j ) {
1194 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1198 for( ; element!=
end; ++element )
1200 const size_t i1( element->index() );
1201 const ET1 v1( element->value() );
1207 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1208 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1209 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1212 ?( LOW ?
min(i+1UL,i1) : i1 )
1213 :( LOW ?
min(i,i1)+1UL : i1+1UL ) )
1214 :( LOW ? i+1UL : B.columns() ) );
1217 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1218 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1222 for( ; j<jpos; j+=SIMDSIZE ) {
1223 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
1225 for( ; remainder && j<jend; ++j ) {
1226 C(i,j) += v1 * B(i1,j);
1251 template<
typename MT
1270 SMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1289 template<
typename MT3
1293 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1299 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1301 const size_t jtmp(
min( jj+block, B.columns() ) );
1303 for(
size_t i=0UL; i<A.rows(); ++i )
1308 for( ; element!=
end; ++element )
1310 const size_t i1( element->index() );
1314 C(i,i1) -= element->value() * B(i1,i1);
1327 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1334 const size_t jnum( jend - jbegin );
1335 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1338 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1339 C(i,j ) -= element->value() * B(i1,j );
1340 C(i,j+1UL) -= element->value() * B(i1,j+1UL);
1341 C(i,j+2UL) -= element->value() * B(i1,j+2UL);
1342 C(i,j+3UL) -= element->value() * B(i1,j+3UL);
1344 for(
size_t j=jpos; j<jend; ++j ) {
1345 C(i,j) -= element->value() * B(i1,j);
1369 template<
typename MT3
1373 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1379 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1381 const size_t jtmp(
min( jj+block, B.columns() ) );
1383 for(
size_t i=0UL; i<A.rows(); ++i )
1388 const size_t nonzeros( A.nonZeros(i) );
1389 const size_t kpos( nonzeros &
size_t(-4) );
1392 for(
size_t k=0UL; k<kpos; k+=4UL )
1394 const size_t i1( element->index() );
1395 const ET1 v1( element->value() );
1397 const size_t i2( element->index() );
1398 const ET1 v2( element->value() );
1400 const size_t i3( element->index() );
1401 const ET1 v3( element->value() );
1403 const size_t i4( element->index() );
1404 const ET1 v4( element->value() );
1413 :( UPP ?
max(i,jj) : jj ) );
1418 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1425 const size_t jnum( jend - jbegin );
1426 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1429 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1430 C(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1431 C(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1432 C(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1433 C(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1435 for(
size_t j=jpos; j<jend; ++j ) {
1436 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1440 for( ; element!=
end; ++element )
1442 const size_t i1( element->index() );
1443 const ET1 v1( element->value() );
1449 :( UPP ?
max(i,jj) : jj ) );
1454 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1461 const size_t jnum( jend - jbegin );
1462 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1465 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1466 C(i,j ) -= v1 * B(i1,j );
1467 C(i,j+1UL) -= v1 * B(i1,j+1UL);
1468 C(i,j+2UL) -= v1 * B(i1,j+2UL);
1469 C(i,j+3UL) -= v1 * B(i1,j+3UL);
1471 for(
size_t j=jpos; j<jend; ++j ) {
1472 C(i,j) -= v1 * B(i1,j);
1495 template<
typename MT3
1499 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1505 for(
size_t i=0UL; i<A.rows(); ++i )
1510 const size_t nonzeros( A.nonZeros(i) );
1511 const size_t kpos( nonzeros &
size_t(-4) );
1514 for(
size_t k=0UL; k<kpos; k+=4UL )
1516 const size_t i1( element->index() );
1517 const ET1 v1( element->value() );
1519 const size_t i2( element->index() );
1520 const ET1 v2( element->value() );
1522 const size_t i3( element->index() );
1523 const ET1 v3( element->value() );
1525 const size_t i4( element->index() );
1526 const ET1 v4( element->value() );
1538 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1539 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1540 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1543 ?( LOW ?
min(i+1UL,i4) : i4 )
1544 :( LOW ?
min(i,i4)+1UL : i4+1UL ) )
1545 :( LOW ? i+1UL : B.columns() ) );
1548 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1549 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1553 for( ; j<jpos; j+=SIMDSIZE ) {
1554 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) - xmm2 * B.load(i2,j) - xmm3 * B.load(i3,j) - xmm4 * B.load(i4,j) );
1556 for( ; remainder && j<jend; ++j ) {
1557 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1561 for( ; element!=
end; ++element )
1563 const size_t i1( element->index() );
1564 const ET1 v1( element->value() );
1570 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1571 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1572 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1575 ?( LOW ?
min(i+1UL,i1) : i1 )
1576 :( LOW ?
min(i,i1)+1UL : i1+1UL ) )
1577 :( LOW ? i+1UL : B.columns() ) );
1580 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1581 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1585 for( ; j<jpos; j+=SIMDSIZE ) {
1586 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) );
1588 for( ; remainder && j<jend; ++j ) {
1589 C(i,j) -= v1 * B(i1,j);
1614 template<
typename MT
1628 schurAssign( ~lhs, tmp );
1660 template<
typename MT
1700 template<
typename MT
1719 const ForwardFunctor fwd;
1721 const TmpType tmp( rhs );
1743 template<
typename MT
1788 template<
typename MT
1830 template<
typename MT
1911 template<
typename MT1
1913 inline decltype(
auto)
1960 template<
typename MT1
1975 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2005 template<
typename MT1
2020 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2050 template<
typename MT1
2065 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2095 template<
typename MT1
2110 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2140 template<
typename MT1
2155 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2171 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2172 struct Rows< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2189 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2190 struct Columns< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2207 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2208 struct IsAligned< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2225 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2226 struct IsSymmetric< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2229 , IsBuiltin< ElementType_< SMatDMatMultExpr<MT1,MT2,false,true,false,false> > > >
2230 , And< Bool<LF>, Bool<UF> > >::value >
2246 template<
typename MT1,
typename MT2,
bool SF,
bool LF,
bool UF >
2247 struct IsHermitian< SMatDMatMultExpr<MT1,MT2,SF,true,LF,UF> >
2264 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2265 struct IsLower< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2267 , And< IsLower<MT1>, IsLower<MT2> >
2268 , And< Or< Bool<SF>, Bool<HF> >
2269 , IsUpper<MT1>, IsUpper<MT2> > >::value >
2285 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2286 struct IsUniLower< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2287 :
public BoolConstant< Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
2288 , And< Or< Bool<SF>, Bool<HF> >
2289 , IsUniUpper<MT1>, IsUniUpper<MT2> > >::value >
2305 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2307 :
public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2308 , And< IsStrictlyLower<MT2>, IsLower<MT1> >
2309 , And< Or< Bool<SF>, Bool<HF> >
2310 , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2311 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >::value >
2327 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2328 struct IsUpper< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2330 , And< IsUpper<MT1>, IsUpper<MT2> >
2331 , And< Or< Bool<SF>, Bool<HF> >
2332 , IsLower<MT1>, IsLower<MT2> > >::value >
2348 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2349 struct IsUniUpper< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2350 :
public BoolConstant< Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
2351 , And< Or< Bool<SF>, Bool<HF> >
2352 , IsUniLower<MT1>, IsUniLower<MT2> > >::value >
2368 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2370 :
public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2371 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
2372 , And< Or< Bool<SF>, Bool<HF> >
2373 , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2374 , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >::value >
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: SMatDMatMultExpr.h:443
Header file for auxiliary alias declarations.
Headerfile for the generic min algorithm.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:996
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:136
Flag for Hermitian matrices.
Definition: SMatDMatMultExpr.h:153
Subvector< VT, AF > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:322
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:134
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Flag for upper matrices.
Definition: SMatDMatMultExpr.h:155
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:198
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
SMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the SMatDMatMultExpr class.
Definition: SMatDMatMultExpr.h:290
ResultType_< MT1 > RT1
Result type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:131
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:560
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
RightOperand rightOperand() const noexcept
Returns the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:399
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1027
Flag for symmetric matrices.
Definition: SMatDMatMultExpr.h:152
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:250
Column< MT > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:124
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for sparse matrix-dense matrix multiplications.The SMatDMatMultExpr class represent...
Definition: Forward.h:107
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
ElementType_< ResultType > ElementType
Resulting element type.
Definition: SMatDMatMultExpr.h:249
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:264
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1809
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:78
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:129
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Constraint on the data type.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
LeftOperand leftOperand() const noexcept
Returns the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:389
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: SMatDMatMultExpr.h:411
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Headerfile for the generic max algorithm.
Flag for lower matrices.
Definition: SMatDMatMultExpr.h:154
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the DeclLow functor.
ElementType_< RT1 > ET1
Element type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:133
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: SMatDMatMultExpr.h:247
Header file for the If class template.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:110
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:132
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: SMatDMatMultExpr.h:353
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:3087
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1027
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
Generic wrapper for the null function.
Definition: Noop.h:58
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:264
Header file for the DeclDiag functor.
Constraint on the data type.
Header file for all forward declarations for expression class templates.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: SMatDMatMultExpr.h:305
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:108
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
Header file for the conjugate shim.
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: SMatDMatMultExpr.h:246
Compile time check for resizable data types.This type trait tests whether the given data type is a re...
Definition: IsResizable.h:75
Header file for the IsSIMDCombinable type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: SMatDMatMultExpr.h:379
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: SMatDMatMultExpr.h:369
Utility type for generic codes.
LeftOperand lhs_
Left-hand side sparse matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:450
const ResultType CompositeType
Data type for composite expression templates.
Definition: SMatDMatMultExpr.h:252
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1029
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: SMatDMatMultExpr.h:250
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: SMatDMatMultExpr.h:423
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:819
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
Header file for the RemoveReference type trait.
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:263
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
CompositeType_< MT1 > CT1
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:135
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3082
typename T::ConstIterator ConstIterator_
Alias declaration for nested ConstIterator type definitions.The ConstIterator_ alias declaration prov...
Definition: Aliases.h:103
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: SMatDMatMultExpr.h:433
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1029
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: SMatDMatMultExpr.h:248
Compile time logical or evaluation.The Or alias declaration performs at compile time a logical or ('&&...
Definition: Or.h:76
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:261
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:258
Header file for the IntegralConstant class template.
Compile time evaluation of the number of columns of a matrix.The Columns type trait evaluates the num...
Definition: Columns.h:75
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:255
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:75
Header file for the DeclHerm functor.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: SMatDMatMultExpr.h:251
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:600
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1321
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:742
Header file for the IsResizable type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:61
Header file for the IsExpression type trait class.
Header file for the function trace functionality.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:451