35 #ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_ 117 template<
typename MT1
123 class SMatDMatMultExpr
124 :
public MatMatMultExpr< DenseMatrix< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false > >
125 ,
private Computation
150 SYM = ( SF && !( HF || LF || UF ) ),
151 HERM = ( HF && !( LF || UF ) ),
152 LOW = ( LF || ( ( SF || HF ) && UF ) ),
153 UPP = ( UF || ( ( SF || HF ) && LF ) )
163 template<
typename T1,
typename T2,
typename T3 >
164 struct IsEvaluationRequired {
165 enum :
bool { value = ( evaluateLeft || evaluateRight ) };
175 template<
typename T1,
typename T2,
typename T3 >
176 struct UseVectorizedKernel {
177 enum :
bool { value = useOptimizedKernels &&
179 T1::simdEnabled && T3::simdEnabled &&
196 template<
typename T1,
typename T2,
typename T3 >
197 struct UseOptimizedKernel {
198 enum :
bool { value = useOptimizedKernels &&
199 !UseVectorizedKernel<T1,T2,T3>::value &&
212 template<
typename T1,
typename T2,
typename T3 >
213 struct UseDefaultKernel {
214 enum :
bool { value = !UseVectorizedKernel<T1,T2,T3>::value &&
215 !UseOptimizedKernel<T1,T2,T3>::value };
273 enum :
bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
274 !evaluateRight && MT2::smpAssignable };
329 :(
lhs_.columns() ) ) );
333 const size_t n(
end - begin );
353 if( i >=
lhs_.rows() ) {
356 if( j >=
rhs_.columns() ) {
368 inline size_t rows() const noexcept {
379 return rhs_.columns();
409 template<
typename T >
410 inline bool canAlias(
const T* alias )
const noexcept {
411 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
421 template<
typename T >
422 inline bool isAliased(
const T* alias )
const noexcept {
423 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
433 return rhs_.isAligned();
466 template<
typename MT
485 SMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
504 template<
typename MT3
508 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
516 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
518 const size_t jtmp(
min( jj+block, B.columns() ) );
520 for(
size_t i=0UL; i<A.rows(); ++i )
525 for( ; element!=
end; ++element )
527 const size_t i1( element->index() );
531 C(i,i1) = element->value() * B(i1,i1);
541 ?( ( SYM || HERM || LOW )
544 :( SYM || HERM || LOW ?
min(i+1UL,jtmp) : jtmp ) );
551 for(
size_t j=jbegin; j<jend; ++j ) {
553 C(i,j) = element->value() * B(i1,j);
555 C(i,j) += element->value() * B(i1,j);
563 for(
size_t i=0UL; i<A.rows(); ++i ) {
564 for(
size_t j=i+1UL; j<B.columns(); ++j ) {
565 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
587 template<
typename MT3
591 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
599 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
601 const size_t jtmp(
min( jj+block, B.columns() ) );
603 for(
size_t i=0UL; i<A.rows(); ++i )
608 const size_t nonzeros( A.nonZeros(i) );
609 const size_t kpos( nonzeros &
size_t(-4) );
612 for(
size_t k=0UL; k<kpos; k+=4UL )
614 const size_t i1( element->index() );
615 const ET1 v1( element->value() );
617 const size_t i2( element->index() );
618 const ET1 v2( element->value() );
620 const size_t i3( element->index() );
621 const ET1 v3( element->value() );
623 const size_t i4( element->index() );
624 const ET1 v4( element->value() );
633 :( UPP ?
max(i,jj) : jj ) );
635 ?( ( SYM || HERM || LOW )
638 :( SYM || HERM || LOW ?
min(i+1UL,jtmp) : jtmp ) );
645 const size_t jnum( jend - jbegin );
646 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
649 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
650 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
651 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
652 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
653 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
655 for(
size_t j=jpos; j<jend; ++j ) {
656 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
660 for( ; element!=
end; ++element )
662 const size_t i1( element->index() );
663 const ET1 v1( element->value() );
669 :( UPP ?
max(i,jj) : jj ) );
671 ?( ( SYM || HERM || LOW )
674 :( SYM || HERM || LOW ?
min(i+1UL,jtmp) : jtmp ) );
681 const size_t jnum( jend - jbegin );
682 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
685 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
686 C(i,j ) += v1 * B(i1,j );
687 C(i,j+1UL) += v1 * B(i1,j+1UL);
688 C(i,j+2UL) += v1 * B(i1,j+2UL);
689 C(i,j+3UL) += v1 * B(i1,j+3UL);
691 for(
size_t j=jpos; j<jend; ++j ) {
692 C(i,j) += v1 * B(i1,j);
699 for(
size_t i=0UL; i<A.rows(); ++i ) {
700 for(
size_t j=i+1UL; j<B.columns(); ++j ) {
701 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
723 template<
typename MT3
727 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
735 for(
size_t i=0UL; i<A.rows(); ++i )
740 const size_t nonzeros( A.nonZeros(i) );
741 const size_t kpos( nonzeros &
size_t(-4) );
744 for(
size_t k=0UL; k<kpos; k+=4UL )
746 const size_t i1( element->index() );
747 const ET1 v1( element->value() );
749 const size_t i2( element->index() );
750 const ET1 v2( element->value() );
752 const size_t i3( element->index() );
753 const ET1 v3( element->value() );
755 const size_t i4( element->index() );
756 const ET1 v4( element->value() );
768 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
769 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
770 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
773 ?( SYM || HERM || LOW ?
min(i+1UL,i4) : i4 )
774 :( SYM || HERM || LOW ?
min(i,i4)+1UL : i4+1UL ) )
775 :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
778 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
779 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
783 for( ; j<jpos; j+=SIMDSIZE ) {
784 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
786 for( ; remainder && j<jend; ++j ) {
787 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
791 for( ; element!=
end; ++element )
793 const size_t i1( element->index() );
794 const ET1 v1( element->value() );
800 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
801 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
802 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
805 ?( SYM || HERM || LOW ?
min(i+1UL,i1) : i1 )
806 :( SYM || HERM || LOW ?
min(i,i1)+1UL : i1+1UL ) )
807 :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
810 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
811 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
815 for( ; j<jpos; j+=SIMDSIZE ) {
816 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
818 for( ; remainder && j<jend; ++j ) {
819 C(i,j) += v1 * B(i1,j);
825 for(
size_t i=0UL; i<A.rows(); ++i ) {
826 for(
size_t j=i+1UL; j<B.columns(); ++j ) {
827 C(i,j) = HERM ?
conj( C(j,i) ) : C(j,i);
848 template<
typename MT
866 const ForwardFunctor fwd;
868 const TmpType tmp(
serial( rhs ) );
869 assign( ~lhs, fwd( tmp ) );
887 template<
typename MT
906 SMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
925 template<
typename MT3
929 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
935 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
937 const size_t jtmp(
min( jj+block, B.columns() ) );
939 for(
size_t i=0UL; i<A.rows(); ++i )
944 for( ; element!=
end; ++element )
946 const size_t i1( element->index() );
950 C(i,i1) += element->value() * B(i1,i1);
963 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
970 const size_t jnum( jend - jbegin );
971 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
974 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
975 C(i,j ) += element->value() * B(i1,j );
976 C(i,j+1UL) += element->value() * B(i1,j+1UL);
977 C(i,j+2UL) += element->value() * B(i1,j+2UL);
978 C(i,j+3UL) += element->value() * B(i1,j+3UL);
980 for(
size_t j=jpos; j<jend; ++j ) {
981 C(i,j) += element->value() * B(i1,j);
1005 template<
typename MT3
1009 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1015 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1017 const size_t jtmp(
min( jj+block, B.columns() ) );
1019 for(
size_t i=0UL; i<A.rows(); ++i )
1024 const size_t nonzeros( A.nonZeros(i) );
1025 const size_t kpos( nonzeros &
size_t(-4) );
1028 for(
size_t k=0UL; k<kpos; k+=4UL )
1030 const size_t i1( element->index() );
1031 const ET1 v1( element->value() );
1033 const size_t i2( element->index() );
1034 const ET1 v2( element->value() );
1036 const size_t i3( element->index() );
1037 const ET1 v3( element->value() );
1039 const size_t i4( element->index() );
1040 const ET1 v4( element->value() );
1049 :( UPP ?
max(i,jj) : jj ) );
1054 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1061 const size_t jnum( jend - jbegin );
1062 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1065 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1066 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1067 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1068 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1069 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1071 for(
size_t j=jpos; j<jend; ++j ) {
1072 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1076 for( ; element!=
end; ++element )
1078 const size_t i1( element->index() );
1079 const ET1 v1( element->value() );
1085 :( UPP ?
max(i,jj) : jj ) );
1090 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1097 const size_t jnum( jend - jbegin );
1098 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1101 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1102 C(i,j ) += v1 * B(i1,j );
1103 C(i,j+1UL) += v1 * B(i1,j+1UL);
1104 C(i,j+2UL) += v1 * B(i1,j+2UL);
1105 C(i,j+3UL) += v1 * B(i1,j+3UL);
1107 for(
size_t j=jpos; j<jend; ++j ) {
1108 C(i,j) += v1 * B(i1,j);
1131 template<
typename MT3
1135 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1141 for(
size_t i=0UL; i<A.rows(); ++i )
1146 const size_t nonzeros( A.nonZeros(i) );
1147 const size_t kpos( nonzeros &
size_t(-4) );
1150 for(
size_t k=0UL; k<kpos; k+=4UL )
1152 const size_t i1( element->index() );
1153 const ET1 v1( element->value() );
1155 const size_t i2( element->index() );
1156 const ET1 v2( element->value() );
1158 const size_t i3( element->index() );
1159 const ET1 v3( element->value() );
1161 const size_t i4( element->index() );
1162 const ET1 v4( element->value() );
1174 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1175 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1176 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1179 ?( LOW ?
min(i+1UL,i4) : i4 )
1180 :( LOW ?
min(i,i4)+1UL : i4+1UL ) )
1181 :( LOW ? i+1UL : B.columns() ) );
1184 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1185 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1189 for( ; j<jpos; j+=SIMDSIZE ) {
1190 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
1192 for( ; remainder && j<jend; ++j ) {
1193 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1197 for( ; element!=
end; ++element )
1199 const size_t i1( element->index() );
1200 const ET1 v1( element->value() );
1206 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1207 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1208 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1211 ?( LOW ?
min(i+1UL,i1) : i1 )
1212 :( LOW ?
min(i,i1)+1UL : i1+1UL ) )
1213 :( LOW ? i+1UL : B.columns() ) );
1216 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1217 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1221 for( ; j<jpos; j+=SIMDSIZE ) {
1222 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
1224 for( ; remainder && j<jend; ++j ) {
1225 C(i,j) += v1 * B(i1,j);
1250 template<
typename MT
1269 SMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1288 template<
typename MT3
1292 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1298 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1300 const size_t jtmp(
min( jj+block, B.columns() ) );
1302 for(
size_t i=0UL; i<A.rows(); ++i )
1307 for( ; element!=
end; ++element )
1309 const size_t i1( element->index() );
1313 C(i,i1) -= element->value() * B(i1,i1);
1326 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1333 const size_t jnum( jend - jbegin );
1334 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1337 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1338 C(i,j ) -= element->value() * B(i1,j );
1339 C(i,j+1UL) -= element->value() * B(i1,j+1UL);
1340 C(i,j+2UL) -= element->value() * B(i1,j+2UL);
1341 C(i,j+3UL) -= element->value() * B(i1,j+3UL);
1343 for(
size_t j=jpos; j<jend; ++j ) {
1344 C(i,j) -= element->value() * B(i1,j);
1368 template<
typename MT3
1372 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1378 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1380 const size_t jtmp(
min( jj+block, B.columns() ) );
1382 for(
size_t i=0UL; i<A.rows(); ++i )
1387 const size_t nonzeros( A.nonZeros(i) );
1388 const size_t kpos( nonzeros &
size_t(-4) );
1391 for(
size_t k=0UL; k<kpos; k+=4UL )
1393 const size_t i1( element->index() );
1394 const ET1 v1( element->value() );
1396 const size_t i2( element->index() );
1397 const ET1 v2( element->value() );
1399 const size_t i3( element->index() );
1400 const ET1 v3( element->value() );
1402 const size_t i4( element->index() );
1403 const ET1 v4( element->value() );
1412 :( UPP ?
max(i,jj) : jj ) );
1417 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1424 const size_t jnum( jend - jbegin );
1425 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1428 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1429 C(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1430 C(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1431 C(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1432 C(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1434 for(
size_t j=jpos; j<jend; ++j ) {
1435 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1439 for( ; element!=
end; ++element )
1441 const size_t i1( element->index() );
1442 const ET1 v1( element->value() );
1448 :( UPP ?
max(i,jj) : jj ) );
1453 :( LOW ?
min(i+1UL,jtmp) : jtmp ) );
1460 const size_t jnum( jend - jbegin );
1461 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1464 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1465 C(i,j ) -= v1 * B(i1,j );
1466 C(i,j+1UL) -= v1 * B(i1,j+1UL);
1467 C(i,j+2UL) -= v1 * B(i1,j+2UL);
1468 C(i,j+3UL) -= v1 * B(i1,j+3UL);
1470 for(
size_t j=jpos; j<jend; ++j ) {
1471 C(i,j) -= v1 * B(i1,j);
1494 template<
typename MT3
1498 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1504 for(
size_t i=0UL; i<A.rows(); ++i )
1509 const size_t nonzeros( A.nonZeros(i) );
1510 const size_t kpos( nonzeros &
size_t(-4) );
1513 for(
size_t k=0UL; k<kpos; k+=4UL )
1515 const size_t i1( element->index() );
1516 const ET1 v1( element->value() );
1518 const size_t i2( element->index() );
1519 const ET1 v2( element->value() );
1521 const size_t i3( element->index() );
1522 const ET1 v3( element->value() );
1524 const size_t i4( element->index() );
1525 const ET1 v4( element->value() );
1537 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1538 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1539 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1542 ?( LOW ?
min(i+1UL,i4) : i4 )
1543 :( LOW ?
min(i,i4)+1UL : i4+1UL ) )
1544 :( LOW ? i+1UL : B.columns() ) );
1547 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1548 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1552 for( ; j<jpos; j+=SIMDSIZE ) {
1553 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) - xmm2 * B.load(i2,j) - xmm3 * B.load(i3,j) - xmm4 * B.load(i4,j) );
1555 for( ; remainder && j<jend; ++j ) {
1556 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1560 for( ; element!=
end; ++element )
1562 const size_t i1( element->index() );
1563 const ET1 v1( element->value() );
1569 ?( ( UPP ?
max(i,i1+1UL) : i1+1UL ) &
size_t(-SIMDSIZE) )
1570 :( ( UPP ?
max(i,i1) : i1 ) &
size_t(-SIMDSIZE) ) )
1571 :( UPP ? ( i &
size_t(-SIMDSIZE) ) : 0UL ) );
1574 ?( LOW ?
min(i+1UL,i1) : i1 )
1575 :( LOW ?
min(i,i1)+1UL : i1+1UL ) )
1576 :( LOW ? i+1UL : B.columns() ) );
1579 const size_t jpos( remainder ? ( jend &
size_t(-SIMDSIZE) ) : jend );
1580 BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos,
"Invalid end calculation" );
1584 for( ; j<jpos; j+=SIMDSIZE ) {
1585 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) );
1587 for( ; remainder && j<jend; ++j ) {
1588 C(i,j) -= v1 * B(i1,j);
1613 template<
typename MT
1627 schurAssign( ~lhs, tmp );
1659 template<
typename MT
1699 template<
typename MT
1718 const ForwardFunctor fwd;
1720 const TmpType tmp( rhs );
1742 template<
typename MT
1787 template<
typename MT
1829 template<
typename MT
1910 template<
typename MT1
1912 inline decltype(
auto)
1959 template<
typename MT1
1974 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2004 template<
typename MT1
2019 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2049 template<
typename MT1
2064 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2094 template<
typename MT1
2109 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2139 template<
typename MT1
2154 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2170 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2171 struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2172 :
public Size<MT1,0UL>
2175 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2176 struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2177 :
public Size<MT2,1UL>
2193 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2194 struct IsAligned< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2211 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2212 struct IsSymmetric< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2213 :
public Or< Bool<SF>
2215 , IsBuiltin< ElementType_< SMatDMatMultExpr<MT1,MT2,false,true,false,false> > > >
2216 , And< Bool<LF>, Bool<UF> > >
2232 template<
typename MT1,
typename MT2,
bool SF,
bool LF,
bool UF >
2233 struct IsHermitian< SMatDMatMultExpr<MT1,MT2,SF,true,LF,UF> >
2250 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2251 struct IsLower< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2252 :
public Or< Bool<LF>
2253 , And< IsLower<MT1>, IsLower<MT2> >
2254 , And< Or< Bool<SF>, Bool<HF> >
2255 , IsUpper<MT1>, IsUpper<MT2> > >
2271 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2272 struct IsUniLower< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2273 :
public Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
2274 , And< Or< Bool<SF>, Bool<HF> >
2275 , IsUniUpper<MT1>, IsUniUpper<MT2> > >
2291 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2293 :
public Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2294 , And< IsStrictlyLower<MT2>, IsLower<MT1> >
2295 , And< Or< Bool<SF>, Bool<HF> >
2296 , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2297 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >
2313 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2314 struct IsUpper< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2315 :
public Or< Bool<UF>
2316 , And< IsUpper<MT1>, IsUpper<MT2> >
2317 , And< Or< Bool<SF>, Bool<HF> >
2318 , IsLower<MT1>, IsLower<MT2> > >
2334 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2335 struct IsUniUpper< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2336 :
public Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
2337 , And< Or< Bool<SF>, Bool<HF> >
2338 , IsUniLower<MT1>, IsUniLower<MT2> > >
2354 template<
typename MT1,
typename MT2,
bool SF,
bool HF,
bool LF,
bool UF >
2356 :
public Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
2357 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
2358 , And< Or< Bool<SF>, Bool<HF> >
2359 , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
2360 , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:329
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: SMatDMatMultExpr.h:442
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:131
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:71
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:996
Header file for the IsUniUpper type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:86
Header file for basic type definitions.
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:134
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:132
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:364
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
SMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the SMatDMatMultExpr class.
Definition: SMatDMatMultExpr.h:288
ResultType_< MT1 > RT1
Result type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:129
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:588
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1903
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:87
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
RightOperand rightOperand() const noexcept
Returns the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:398
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1026
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:291
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for sparse matrix-dense matrix multiplications.The SMatDMatMultExpr class represent...
Definition: Forward.h:107
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:87
ElementType_< ResultType > ElementType
Resulting element type.
Definition: SMatDMatMultExpr.h:247
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:262
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1950
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:80
Flag for Hermitian matrices.
Definition: SMatDMatMultExpr.h:151
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:129
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Constraint on the data type.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:71
LeftOperand leftOperand() const noexcept
Returns the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:388
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: SMatDMatMultExpr.h:410
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Headerfile for the generic max algorithm.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the DeclLow functor.
ElementType_< RT1 > ET1
Element type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:131
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: SMatDMatMultExpr.h:245
Header file for the If class template.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:110
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:130
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: SMatDMatMultExpr.h:352
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:3085
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1026
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:89
Generic wrapper for the null function.
Definition: Noop.h:59
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:430
Header file for the DeclDiag functor.
Constraint on the data type.
Header file for all forward declarations for expression class templates.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: SMatDMatMultExpr.h:303
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:107
Flag for upper matrices.
Definition: SMatDMatMultExpr.h:153
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
Header file for the conjugate shim.
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: SMatDMatMultExpr.h:244
Compile time check for resizable data types.This type trait tests whether the given data type is a re...
Definition: IsResizable.h:75
Header file for the IsSIMDCombinable type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: SMatDMatMultExpr.h:378
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: SMatDMatMultExpr.h:368
LeftOperand lhs_
Left-hand side sparse matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:449
const ResultType CompositeType
Data type for composite expression templates.
Definition: SMatDMatMultExpr.h:250
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:131
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1028
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: SMatDMatMultExpr.h:248
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: SMatDMatMultExpr.h:422
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:816
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
Header file for the RemoveReference type trait.
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:263
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
CompositeType_< MT1 > CT1
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:133
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3080
typename T::ConstIterator ConstIterator_
Alias declaration for nested ConstIterator type definitions.The ConstIterator_ alias declaration prov...
Definition: Aliases.h:103
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: SMatDMatMultExpr.h:432
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1028
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: SMatDMatMultExpr.h:246
Compile time logical 'or' evaluation.The Or alias declaration performs at compile time a logical 'or'...
Definition: Or.h:76
Flag for lower matrices.
Definition: SMatDMatMultExpr.h:152
Compile time evaluation of the size of vectors and matrices.The Size type trait evaluates the size of...
Definition: Size.h:80
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:259
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:256
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:253
Header file for the DeclHerm functor.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: SMatDMatMultExpr.h:249
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:628
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1321
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:908
Header file for the IsResizable type trait.
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:61
Flag for symmetric matrices.
Definition: SMatDMatMultExpr.h:150
Header file for the IsExpression type trait class.
Header file for the function trace functionality.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:450