35 #ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
122 template<
typename MT1
124 class SMatDMatMultExpr :
public DenseMatrix< SMatDMatMultExpr<MT1,MT2>, false >
125 ,
private MatMatMultExpr
126 ,
private Computation
154 template<
typename T1,
typename T2,
typename T3 >
155 struct IsEvaluationRequired {
156 enum { value = ( evaluateLeft || evaluateRight ) };
166 template<
typename T1,
typename T2,
typename T3 >
167 struct UseVectorizedKernel {
169 !IsDiagonal<T3>::value &&
170 T1::vectorizable && T3::vectorizable &&
171 IsRowMajorMatrix<T1>::value &&
172 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
173 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
174 IntrinsicTrait<typename T1::ElementType>::addition &&
175 IntrinsicTrait<typename T1::ElementType>::subtraction &&
176 IntrinsicTrait<typename T1::ElementType>::multiplication };
187 template<
typename T1,
typename T2,
typename T3 >
188 struct UseOptimizedKernel {
190 !UseVectorizedKernel<T1,T2,T3>::value &&
191 !IsDiagonal<T3>::value &&
192 !IsResizable<typename T1::ElementType>::value &&
193 !IsResizable<ET1>::value };
203 template<
typename T1,
typename T2,
typename T3 >
204 struct UseDefaultKernel {
205 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
206 !UseOptimizedKernel<T1,T2,T3>::value };
244 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
245 !evaluateRight && MT2::smpAssignable };
278 if(
lhs_.columns() == 0UL )
293 if( element != end ) {
294 tmp = element->value() *
rhs_(element->index(),j);
296 for( ; element!=
end; ++element ) {
297 tmp += element->value() *
rhs_(element->index(),j);
320 :(
lhs_.columns() ) ) );
323 tmp =
lhs_(i,kbegin) *
rhs_(kbegin,j);
324 for(
size_t k=kbegin+1UL; k<kend; ++k ) {
342 inline ReturnType
at(
size_t i,
size_t j )
const {
343 if( i >=
lhs_.rows() ) {
346 if( j >=
rhs_.columns() ) {
369 return rhs_.columns();
399 template<
typename T >
401 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
411 template<
typename T >
413 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
423 return rhs_.isAligned();
433 return (
rows() > SMP_SMATDMATMULT_THRESHOLD );
456 template<
typename MT
465 LT A(
serial( rhs.lhs_ ) );
466 RT B(
serial( rhs.rhs_ ) );
475 SMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
494 template<
typename MT3
498 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
506 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
508 const size_t jtmp(
min( jj+block, B.columns() ) );
510 for(
size_t i=0UL; i<A.rows(); ++i )
512 ConstIterator element( A.begin(i) );
513 const ConstIterator
end( A.end(i) );
515 for( ; element!=
end; ++element )
517 const size_t i1( element->index() );
521 C(i,i1) = element->value() * B(i1,i1);
525 const size_t jbegin( ( IsUpper<MT5>::value )
526 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
528 const size_t jend( ( IsLower<MT5>::value )
529 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i1 : i1+1UL ) ) )
532 if( IsTriangular<MT5>::value && jbegin >= jend )
537 for(
size_t j=jbegin; j<jend; ++j ) {
539 C(i,j) = element->value() * B(i1,j);
541 C(i,j) += element->value() * B(i1,j);
565 template<
typename MT3
568 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
569 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
573 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 64UL );
577 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
579 const size_t jtmp(
min( jj+block, B.columns() ) );
581 for(
size_t i=0UL; i<A.rows(); ++i )
583 const ConstIterator
end( A.end(i) );
584 ConstIterator element( A.begin(i) );
586 const size_t nonzeros( A.nonZeros(i) );
587 const size_t kpos( nonzeros &
size_t(-4) );
590 for(
size_t k=0UL; k<kpos; k+=4UL )
592 const size_t i1( element->index() );
593 const ET1 v1( element->value() );
595 const size_t i2( element->index() );
596 const ET1 v2( element->value() );
598 const size_t i3( element->index() );
599 const ET1 v3( element->value() );
601 const size_t i4( element->index() );
602 const ET1 v4( element->value() );
607 const size_t jbegin( ( IsUpper<MT5>::value )
608 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
610 const size_t jend( ( IsLower<MT5>::value )
611 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i4 : i4+1UL ) ) )
614 if( IsTriangular<MT5>::value && jbegin >= jend )
619 const size_t jnum( jend - jbegin );
620 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
623 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
624 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
625 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
626 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
627 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
629 for(
size_t j=jpos; j<jend; ++j ) {
630 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
634 for( ; element!=
end; ++element )
636 const size_t i1( element->index() );
637 const ET1 v1( element->value() );
639 const size_t jbegin( ( IsUpper<MT5>::value )
640 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
642 const size_t jend( ( IsLower<MT5>::value )
643 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i1 : i1+1UL ) ) )
646 if( IsTriangular<MT5>::value && jbegin >= jend )
651 const size_t jnum( jend - jbegin );
652 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
655 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
656 C(i,j ) += v1 * B(i1,j );
657 C(i,j+1UL) += v1 * B(i1,j+1UL);
658 C(i,j+2UL) += v1 * B(i1,j+2UL);
659 C(i,j+3UL) += v1 * B(i1,j+3UL);
661 for(
size_t j=jpos; j<jend; ++j ) {
662 C(i,j) += v1 * B(i1,j);
685 template<
typename MT3
688 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
689 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
691 typedef IntrinsicTrait<ElementType> IT;
694 const bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
698 for(
size_t i=0UL; i<A.rows(); ++i )
700 const ConstIterator
end( A.end(i) );
701 ConstIterator element( A.begin(i) );
703 const size_t nonzeros( A.nonZeros(i) );
704 const size_t kpos( nonzeros &
size_t(-4) );
707 for(
size_t k=0UL; k<kpos; k+=4UL )
709 const size_t i1( element->index() );
710 const ET1 v1( element->value() );
712 const size_t i2( element->index() );
713 const ET1 v2( element->value() );
715 const size_t i3( element->index() );
716 const ET1 v3( element->value() );
718 const size_t i4( element->index() );
719 const ET1 v4( element->value() );
724 const IntrinsicType xmm1(
set( v1 ) );
725 const IntrinsicType xmm2(
set( v2 ) );
726 const IntrinsicType xmm3(
set( v3 ) );
727 const IntrinsicType xmm4(
set( v4 ) );
729 const size_t jbegin( ( IsUpper<MT5>::value )
730 ?( ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) &
size_t(-
IT::size) )
732 const size_t jend( ( IsLower<MT5>::value )
733 ?( IsStrictlyLower<MT5>::value ? i4 : i4+1UL )
737 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
743 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
745 for( ; remainder && j<jend; ++j ) {
746 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
750 for( ; element!=
end; ++element )
752 const size_t i1( element->index() );
753 const ET1 v1( element->value() );
755 const IntrinsicType xmm1(
set( v1 ) );
757 const size_t jbegin( ( IsUpper<MT5>::value )
758 ?( ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) &
size_t(-
IT::size) )
760 const size_t jend( ( IsLower<MT5>::value )
761 ?( IsStrictlyLower<MT5>::value ? i1 : i1+1UL )
765 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
771 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
773 for( ; remainder && j<jend; ++j ) {
774 C(i,j) += v1 * B(i1,j);
795 template<
typename MT
797 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const SMatDMatMultExpr& rhs )
801 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
813 const TmpType tmp(
serial( rhs ) );
832 template<
typename MT
834 friend inline void addAssign( DenseMatrix<MT,SO>& lhs,
const SMatDMatMultExpr& rhs )
841 LT A(
serial( rhs.lhs_ ) );
842 RT B(
serial( rhs.rhs_ ) );
851 SMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
870 template<
typename MT3
873 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
874 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
878 const size_t block( Or< IsRowMajorMatrix<MT3>, IsDiagonal<MT5> >::value ? B.columns() : 64UL );
880 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
882 const size_t jtmp(
min( jj+block, B.columns() ) );
884 for(
size_t i=0UL; i<A.rows(); ++i )
886 const ConstIterator
end( A.end(i) );
887 ConstIterator element( A.begin(i) );
889 for( ; element!=
end; ++element )
891 const size_t i1( element->index() );
893 if( IsDiagonal<MT5>::value )
895 C(i,i1) += element->value() * B(i1,i1);
899 const size_t jbegin( ( IsUpper<MT5>::value )
900 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
902 const size_t jend( ( IsLower<MT5>::value )
903 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i1 : i1+1UL ) ) )
906 if( IsTriangular<MT5>::value && jbegin >= jend )
911 const size_t jnum( jend - jbegin );
912 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
915 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
916 C(i,j ) += element->value() * B(i1,j );
917 C(i,j+1UL) += element->value() * B(i1,j+1UL);
918 C(i,j+2UL) += element->value() * B(i1,j+2UL);
919 C(i,j+3UL) += element->value() * B(i1,j+3UL);
921 for(
size_t j=jpos; j<jend; ++j ) {
922 C(i,j) += element->value() * B(i1,j);
946 template<
typename MT3
949 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
950 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
954 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 64UL );
956 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
958 const size_t jtmp(
min( jj+block, B.columns() ) );
960 for(
size_t i=0UL; i<A.rows(); ++i )
962 const ConstIterator
end( A.end(i) );
963 ConstIterator element( A.begin(i) );
965 const size_t nonzeros( A.nonZeros(i) );
966 const size_t kpos( nonzeros &
size_t(-4) );
969 for(
size_t k=0UL; k<kpos; k+=4UL )
971 const size_t i1( element->index() );
972 const ET1 v1( element->value() );
974 const size_t i2( element->index() );
975 const ET1 v2( element->value() );
977 const size_t i3( element->index() );
978 const ET1 v3( element->value() );
980 const size_t i4( element->index() );
981 const ET1 v4( element->value() );
986 const size_t jbegin( ( IsUpper<MT5>::value )
987 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
989 const size_t jend( ( IsLower<MT5>::value )
990 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i4 : i4+1UL ) ) )
993 if( IsTriangular<MT5>::value && jbegin >= jend )
998 const size_t jnum( jend - jbegin );
999 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1002 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1003 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1004 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1005 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1006 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1008 for(
size_t j=jpos; j<jend; ++j ) {
1009 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1013 for( ; element!=
end; ++element )
1015 const size_t i1( element->index() );
1016 const ET1 v1( element->value() );
1018 const size_t jbegin( ( IsUpper<MT5>::value )
1019 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
1021 const size_t jend( ( IsLower<MT5>::value )
1022 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i1 : i1+1UL ) ) )
1025 if( IsTriangular<MT5>::value && jbegin >= jend )
1030 const size_t jnum( jend - jbegin );
1031 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1034 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1035 C(i,j ) += v1 * B(i1,j );
1036 C(i,j+1UL) += v1 * B(i1,j+1UL);
1037 C(i,j+2UL) += v1 * B(i1,j+2UL);
1038 C(i,j+3UL) += v1 * B(i1,j+3UL);
1040 for(
size_t j=jpos; j<jend; ++j ) {
1041 C(i,j) += v1 * B(i1,j);
1064 template<
typename MT3
1067 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
1068 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1070 typedef IntrinsicTrait<ElementType> IT;
1073 const bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
1075 for(
size_t i=0UL; i<A.rows(); ++i )
1077 const ConstIterator
end( A.end(i) );
1078 ConstIterator element( A.begin(i) );
1080 const size_t nonzeros( A.nonZeros(i) );
1081 const size_t kpos( nonzeros &
size_t(-4) );
1084 for(
size_t k=0UL; k<kpos; k+=4UL )
1086 const size_t i1( element->index() );
1087 const ET1 v1( element->value() );
1089 const size_t i2( element->index() );
1090 const ET1 v2( element->value() );
1092 const size_t i3( element->index() );
1093 const ET1 v3( element->value() );
1095 const size_t i4( element->index() );
1096 const ET1 v4( element->value() );
1101 const IntrinsicType xmm1(
set( v1 ) );
1102 const IntrinsicType xmm2(
set( v2 ) );
1103 const IntrinsicType xmm3(
set( v3 ) );
1104 const IntrinsicType xmm4(
set( v4 ) );
1106 const size_t jbegin( ( IsUpper<MT5>::value )
1107 ?( ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) &
size_t(-
IT::size) )
1109 const size_t jend( ( IsLower<MT5>::value )
1110 ?( IsStrictlyLower<MT5>::value ? i4 : i4+1UL )
1114 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
1120 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
1122 for( ; remainder && j<jend; ++j ) {
1123 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1127 for( ; element!=
end; ++element )
1129 const size_t i1( element->index() );
1130 const ET1 v1( element->value() );
1132 const IntrinsicType xmm1(
set( v1 ) );
1134 const size_t jbegin( ( IsUpper<MT5>::value )
1135 ?( ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) &
size_t(-
IT::size) )
1137 const size_t jend( ( IsLower<MT5>::value )
1138 ?( IsStrictlyLower<MT5>::value ? i1 : i1+1UL )
1142 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
1148 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
1150 for( ; remainder && j<jend; ++j ) {
1151 C(i,j) += v1 * B(i1,j);
1176 template<
typename MT
1178 friend inline void subAssign( DenseMatrix<MT,SO>& lhs,
const SMatDMatMultExpr& rhs )
1185 LT A(
serial( rhs.lhs_ ) );
1186 RT B(
serial( rhs.rhs_ ) );
1195 SMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1214 template<
typename MT3
1217 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1218 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1222 const size_t block( Or< IsRowMajorMatrix<MT3>, IsDiagonal<MT5> >::value ? B.columns() : 64UL );
1224 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1226 const size_t jtmp(
min( jj+block, B.columns() ) );
1228 for(
size_t i=0UL; i<A.rows(); ++i )
1230 const ConstIterator
end( A.end(i) );
1231 ConstIterator element( A.begin(i) );
1233 for( ; element!=
end; ++element )
1235 const size_t i1( element->index() );
1237 if( IsDiagonal<MT5>::value )
1239 C(i,i1) -= element->value() * B(i1,i1);
1243 const size_t jbegin( ( IsUpper<MT5>::value )
1244 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
1246 const size_t jend( ( IsLower<MT5>::value )
1247 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i1 : i1+1UL ) ) )
1250 if( IsTriangular<MT5>::value && jbegin >= jend )
1255 const size_t jnum( jend - jbegin );
1256 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1259 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1260 C(i,j ) -= element->value() * B(i1,j );
1261 C(i,j+1UL) -= element->value() * B(i1,j+1UL);
1262 C(i,j+2UL) -= element->value() * B(i1,j+2UL);
1263 C(i,j+3UL) -= element->value() * B(i1,j+3UL);
1265 for(
size_t j=jpos; j<jend; ++j ) {
1266 C(i,j) -= element->value() * B(i1,j);
1290 template<
typename MT3
1293 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
1294 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1298 const size_t block( IsRowMajorMatrix<MT3>::value ? B.columns() : 64UL );
1300 for(
size_t jj=0UL; jj<B.columns(); jj+=block )
1302 const size_t jtmp(
min( jj+block, B.columns() ) );
1304 for(
size_t i=0UL; i<A.rows(); ++i )
1306 const ConstIterator
end( A.end(i) );
1307 ConstIterator element( A.begin(i) );
1309 const size_t nonzeros( A.nonZeros(i) );
1310 const size_t kpos( nonzeros &
size_t(-4) );
1313 for(
size_t k=0UL; k<kpos; k+=4UL )
1315 const size_t i1( element->index() );
1316 const ET1 v1( element->value() );
1318 const size_t i2( element->index() );
1319 const ET1 v2( element->value() );
1321 const size_t i3( element->index() );
1322 const ET1 v3( element->value() );
1324 const size_t i4( element->index() );
1325 const ET1 v4( element->value() );
1330 const size_t jbegin( ( IsUpper<MT5>::value )
1331 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
1333 const size_t jend( ( IsLower<MT5>::value )
1334 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i4 : i4+1UL ) ) )
1337 if( IsTriangular<MT5>::value && jbegin >= jend )
1342 const size_t jnum( jend - jbegin );
1343 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1346 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1347 C(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1348 C(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1349 C(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1350 C(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1352 for(
size_t j=jpos; j<jend; ++j ) {
1353 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1357 for( ; element!=
end; ++element )
1359 const size_t i1( element->index() );
1360 const ET1 v1( element->value() );
1362 const size_t jbegin( ( IsUpper<MT5>::value )
1363 ?(
max( jj, ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) ) )
1365 const size_t jend( ( IsLower<MT5>::value )
1366 ?(
min( jtmp, ( IsStrictlyLower<MT5>::value ? i1 : i1+1UL ) ) )
1369 if( IsTriangular<MT5>::value && jbegin >= jend )
1374 const size_t jnum( jend - jbegin );
1375 const size_t jpos( jbegin + ( jnum &
size_t(-4) ) );
1378 for(
size_t j=jbegin; j<jpos; j+=4UL ) {
1379 C(i,j ) -= v1 * B(i1,j );
1380 C(i,j+1UL) -= v1 * B(i1,j+1UL);
1381 C(i,j+2UL) -= v1 * B(i1,j+2UL);
1382 C(i,j+3UL) -= v1 * B(i1,j+3UL);
1384 for(
size_t j=jpos; j<jend; ++j ) {
1385 C(i,j) -= v1 * B(i1,j);
1408 template<
typename MT3
1411 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
1412 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1414 typedef IntrinsicTrait<ElementType> IT;
1417 const bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
1419 for(
size_t i=0UL; i<A.rows(); ++i )
1421 const ConstIterator
end( A.end(i) );
1422 ConstIterator element( A.begin(i) );
1424 const size_t nonzeros( A.nonZeros(i) );
1425 const size_t kpos( nonzeros &
size_t(-4) );
1428 for(
size_t k=0UL; k<kpos; k+=4UL )
1430 const size_t i1( element->index() );
1431 const ET1 v1( element->value() );
1433 const size_t i2( element->index() );
1434 const ET1 v2( element->value() );
1436 const size_t i3( element->index() );
1437 const ET1 v3( element->value() );
1439 const size_t i4( element->index() );
1440 const ET1 v4( element->value() );
1445 const IntrinsicType xmm1(
set( v1 ) );
1446 const IntrinsicType xmm2(
set( v2 ) );
1447 const IntrinsicType xmm3(
set( v3 ) );
1448 const IntrinsicType xmm4(
set( v4 ) );
1450 const size_t jbegin( ( IsUpper<MT5>::value )
1451 ?( ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) &
size_t(-
IT::size) )
1453 const size_t jend( ( IsLower<MT5>::value )
1454 ?( IsStrictlyLower<MT5>::value ? i4 : i4+1UL )
1458 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
1464 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) - xmm2 * B.load(i2,j) - xmm3 * B.load(i3,j) - xmm4 * B.load(i4,j) );
1466 for( ; remainder && j<jend; ++j ) {
1467 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1471 for( ; element!=
end; ++element )
1473 const size_t i1( element->index() );
1474 const ET1 v1( element->value() );
1476 const IntrinsicType xmm1(
set( v1 ) );
1478 const size_t jbegin( ( IsUpper<MT5>::value )
1479 ?( ( IsStrictlyUpper<MT5>::value ? i1+1UL : i1 ) &
size_t(-
IT::size) )
1481 const size_t jend( ( IsLower<MT5>::value )
1482 ?( IsStrictlyLower<MT5>::value ? i1 : i1+1UL )
1486 const size_t jpos( remainder ? ( jend &
size_t(-
IT::size) ) : jend );
1492 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) );
1494 for( ; remainder && j<jend; ++j ) {
1495 C(i,j) -= v1 * B(i1,j);
1530 template<
typename MT
1532 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1570 template<
typename MT
1572 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1577 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1589 const TmpType tmp( rhs );
1611 template<
typename MT
1613 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1656 template<
typename MT
1658 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
1743 template<
typename T1
1745 inline const SMatDMatMultExpr<T1,T2>
1769 template<
typename MT1,
typename MT2 >
1786 template<
typename MT1,
typename MT2 >
1803 template<
typename MT1,
typename MT2 >
1804 struct IsAligned<
SMatDMatMultExpr<MT1,MT2> > :
public IsTrue< IsAligned<MT2>::value >
1820 template<
typename MT1,
typename MT2 >
1822 :
public IsTrue< And< IsLower<MT1>, IsLower<MT2> >::value >
1838 template<
typename MT1,
typename MT2 >
1840 :
public IsTrue< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
1856 template<
typename MT1,
typename MT2 >
1858 :
public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
1859 , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
1875 template<
typename MT1,
typename MT2 >
1877 :
public IsTrue< And< IsUpper<MT1>, IsUpper<MT2> >::value >
1893 template<
typename MT1,
typename MT2 >
1895 :
public IsTrue< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
1911 template<
typename MT1,
typename MT2 >
1913 :
public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
1914 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
1930 template<
typename MT1,
typename MT2,
typename VT >
1935 typedef typename SelectType< IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1936 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
1937 IsDenseVector<VT>::value && IsColumnVector<VT>::value
1938 ,
typename SMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
1939 , INVALID_TYPE >::Type Type;
1948 template<
typename MT1,
typename MT2,
typename VT >
1953 typedef typename SelectType< IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1954 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
1955 IsSparseVector<VT>::value && IsColumnVector<VT>::value
1956 ,
typename SMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
1957 , INVALID_TYPE >::Type Type;
1966 template<
typename VT,
typename MT1,
typename MT2 >
1971 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
1972 IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1973 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
1974 ,
typename TDVecDMatMultExprTrait< typename TDVecSMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1975 , INVALID_TYPE >::Type Type;
1984 template<
typename VT,
typename MT1,
typename MT2 >
1989 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
1990 IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1991 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
1992 ,
typename TSVecDMatMultExprTrait< typename TSVecSMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1993 , INVALID_TYPE >::Type Type;
2002 template<
typename MT1,
typename MT2,
bool AF >
2007 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
2008 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
2017 template<
typename MT1,
typename MT2 >
2022 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
2031 template<
typename MT1,
typename MT2 >
2036 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, simd_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
LeftOperand lhs_
Left-hand side sparse matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:439
Header file for the SMatDVecMultExprTrait class template.
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: SMatDMatMultExpr.h:218
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
Header file for basic type definitions.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:232
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: SMatDMatMultExpr.h:422
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:250
Header file for the IsSparseMatrix type trait.
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:223
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
SMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the SMatDMatMultExpr class.
Definition: SMatDMatMultExpr.h:254
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
Header file for the And class template.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for sparse matrix-dense matrix multiplications.The SMatDMatMultExpr class represent...
Definition: Forward.h:97
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: SMatDMatMultExpr.h:216
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Header file for the TSVecSMatMultExprTrait class template.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:229
Header file for the IsUniLower type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:117
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:547
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
RT1::ElementType ET1
Element type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:132
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:110
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:79
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: SMatDMatMultExpr.h:269
const Element * ConstIterator
Iterator over constant elements.
Definition: CompressedMatrix.h:2592
Header file for the Or class template.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
Header file for the DMatDVecMultExprTrait class template.
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
ResultType::ElementType ElementType
Resulting element type.
Definition: SMatDMatMultExpr.h:217
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: SMatDMatMultExpr.h:412
Constraints on the storage order of matrix types.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:226
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: SMatDMatMultExpr.h:214
MT1::CompositeType CT1
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:134
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
LeftOperand leftOperand() const
Returns the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:378
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: SMatDMatMultExpr.h:342
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:79
const ElementType ReturnType
Return type for expression template evaluations.
Definition: SMatDMatMultExpr.h:219
Removal of reference modifiers.The RemoveCV type trait removes any reference modifiers from the given...
Definition: RemoveReference.h:69
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:1232
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:135
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:138
size_t rows() const
Returns the current number of rows of the matrix.
Definition: SMatDMatMultExpr.h:358
Substitution Failure Is Not An Error (SFINAE) class.The EnableIf class template is an auxiliary tool ...
Definition: EnableIf.h:184
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
Header file for the reset shim.
SMatDMatMultExpr< MT1, MT2 > This
Type of this SMatDMatMultExpr instance.
Definition: SMatDMatMultExpr.h:213
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: SMatDMatMultExpr.h:432
Constraints on the storage order of matrix types.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:131
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:440
Header file for the RemoveReference type trait.
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:122
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
MT1::ResultType RT1
Result type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:130
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
Header file for the TDVecDMatMultExprTrait class template.
Compile time logical or evaluation.The Or class template performs at compile time a logical or ('&&')...
Definition: Or.h:78
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
Header file for the IsTrue value trait.
Header file for the TSVecDMatMultExprTrait class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: SMatDMatMultExpr.h:400
const ResultType CompositeType
Data type for composite expression templates.
Definition: SMatDMatMultExpr.h:220
Header file for the IsUpper type trait.
Header file for exception macros.
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: SMatDMatMultExpr.h:215
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:388
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:133
Header file for the IsResizable type trait.
size_t columns() const
Returns the current number of columns of the matrix.
Definition: SMatDMatMultExpr.h:368
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:79
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.