22 #ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
86 template<
typename MT1
88 class SMatDMatMultExpr :
public DenseMatrix< SMatDMatMultExpr<MT1,MT2>, false >
89 ,
private MatMatMultExpr
94 typedef typename MT1::ResultType
RT1;
95 typedef typename MT2::ResultType
RT2;
96 typedef typename MT1::ElementType
ET1;
97 typedef typename MT2::ElementType
ET2;
98 typedef typename MT1::CompositeType
CT1;
99 typedef typename MT2::CompositeType
CT2;
107 template<
typename T1,
typename T2,
typename T3 >
108 struct UseVectorizedKernel {
109 enum { value = T1::vectorizable && T3::vectorizable &&
126 template<
typename T1,
typename T2,
typename T3 >
127 struct UseOptimizedKernel {
128 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
140 template<
typename T1,
typename T2,
typename T3 >
141 struct UseDefaultKernel {
142 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
143 !UseOptimizedKernel<T1,T2,T3>::value };
174 enum { vectorizable = 0 };
207 if(
lhs_.columns() == 0 )
215 const ConstIterator end( A.end(i) );
216 ConstIterator element( A.begin(i) );
223 tmp = element->value() *
rhs_(element->index(),j);
225 for( ; element!=end; ++element )
226 tmp += element->value() *
rhs_(element->index(),j);
232 for(
size_t k=1; k<
lhs_.columns(); ++k ) {
257 return rhs_.columns();
287 template<
typename T >
289 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
299 template<
typename T >
301 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
323 template<
typename MT
342 SMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
361 template<
typename MT3
369 for(
size_t i=0UL; i<A.rows(); ++i )
371 const ConstIterator end( A.end(i) );
373 for(
size_t j=0UL; j<B.columns(); ++j )
375 ConstIterator element( A.begin(i) );
377 if( element != end ) {
378 (~C)(i,j) = element->value() * B(element->index(),j);
380 for( ; element!=end; ++element ) {
381 (~C)(i,j) += element->value() * B(element->index(),j);
407 template<
typename MT3
410 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
411 selectAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
413 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
415 const size_t jend( B.columns() & size_t(-4) );
420 for(
size_t i=0UL; i<A.rows(); ++i )
422 const ConstIterator end( A.end(i) );
423 ConstIterator element( A.begin(i) );
425 const size_t kend( A.nonZeros(i) & size_t(-4) );
427 for(
size_t k=0UL; k<kend; k+=4UL ) {
428 const size_t i1( element->index() );
429 const ET1 v1( element->value() );
431 const size_t i2( element->index() );
432 const ET1 v2( element->value() );
434 const size_t i3( element->index() );
435 const ET1 v3( element->value() );
437 const size_t i4( element->index() );
438 const ET1 v4( element->value() );
441 for(
size_t j=0UL; j<jend; j+=4UL ) {
442 (~C)(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
443 (~C)(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
444 (~C)(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
445 (~C)(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
447 for(
size_t j=jend; j<B.columns(); ++j ) {
448 (~C)(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
452 for( ; element!=end; ++element ) {
453 for(
size_t j=0UL; j<jend; j+=4UL ) {
454 (~C)(i,j ) += element->value() * B(element->index(),j );
455 (~C)(i,j+1UL) += element->value() * B(element->index(),j+1UL);
456 (~C)(i,j+2UL) += element->value() * B(element->index(),j+2UL);
457 (~C)(i,j+3UL) += element->value() * B(element->index(),j+3UL);
459 for(
size_t j=jend; j<B.columns(); ++j ) {
460 (~C)(i,j) += element->value() * B(element->index(),j);
482 template<
typename MT3
485 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
486 selectAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
488 typedef IntrinsicTrait<ElementType> IT;
489 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
491 const size_t N( B.columns() );
495 for(
size_t i=0UL; i<A.rows(); ++i )
497 const ConstIterator end( A.end(i) );
498 ConstIterator element( A.begin(i) );
500 const size_t kend( A.nonZeros(i) & size_t(-4) );
502 for(
size_t k=0UL; k<kend; k+=4UL ) {
503 const size_t i1( element->index() );
506 const size_t i2( element->index() );
509 const size_t i3( element->index() );
512 const size_t i4( element->index() );
516 for(
size_t j=0UL; j<N; j+=IT::size ) {
517 store( &(~C)(i,j),
load( &(~C)(i,j) ) + v1 * B.get(i1,j) + v2 * B.get(i2,j) + v3 * B.get(i3,j) + v4 * B.get(i4,j) );
521 for( ; element!=end; ++element ) {
522 const size_t i1( element->index() );
525 for(
size_t j=0UL; j<N; j+=IT::size ) {
526 store( &(~C)(i,j),
load( &(~C)(i,j) ) + v1 * B.get(i1,j) );
548 template<
typename MT3
552 selectAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
554 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
556 const size_t jend( B.columns() & size_t(-4) );
559 for(
size_t j=0UL; j<jend; j+=4UL ) {
560 for(
size_t i=0UL; i<A.rows(); ++i )
562 ConstIterator element( A.begin(i) );
563 const ConstIterator end( A.end(i) );
565 if( element == end ) {
567 reset( (~C)(i,j+1UL) );
568 reset( (~C)(i,j+2UL) );
569 reset( (~C)(i,j+3UL) );
573 (~C)(i,j ) = element->value() * B(element->index(),j );
574 (~C)(i,j+1UL) = element->value() * B(element->index(),j+1UL);
575 (~C)(i,j+2UL) = element->value() * B(element->index(),j+2UL);
576 (~C)(i,j+3UL) = element->value() * B(element->index(),j+3UL);
578 for( ; element!=end; ++element ) {
579 (~C)(i,j ) += element->value() * B(element->index(),j );
580 (~C)(i,j+1UL) += element->value() * B(element->index(),j+1UL);
581 (~C)(i,j+2UL) += element->value() * B(element->index(),j+2UL);
582 (~C)(i,j+3UL) += element->value() * B(element->index(),j+3UL);
587 for(
size_t j=jend; j<B.columns(); ++j ) {
588 for(
size_t i=0UL; i<A.rows(); ++i )
590 ConstIterator element( A.begin(i) );
591 const ConstIterator end( A.end(i) );
593 if( element == end ) {
598 (~C)(i,j) = element->value() * B(element->index(),j);
600 for( ; element!=end; ++element )
601 (~C)(i,j) += element->value() * B(element->index(),j);
620 template<
typename MT
626 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
638 const TmpType tmp( rhs );
656 template<
typename MT
675 SMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
694 template<
typename MT3
697 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
698 selectAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
700 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
702 for(
size_t i=0UL; i<A.rows(); ++i )
704 const ConstIterator end( A.end(i) );
706 for(
size_t j=0UL; j<B.columns(); ++j )
708 ConstIterator element( A.begin(i) );
710 for( ; element!=end; ++element ) {
711 (~C)(i,j) += element->value() * B(element->index(),j);
733 template<
typename MT3
736 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
737 selectAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
739 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
741 const size_t jend( B.columns() & size_t(-4) );
744 for(
size_t i=0UL; i<A.rows(); ++i )
746 const ConstIterator end( A.end(i) );
747 ConstIterator element( A.begin(i) );
749 const size_t kend( A.nonZeros(i) & size_t(-4) );
751 for(
size_t k=0UL; k<kend; k+=4UL ) {
752 const size_t i1( element->index() );
753 const ET1 v1( element->value() );
755 const size_t i2( element->index() );
756 const ET1 v2( element->value() );
758 const size_t i3( element->index() );
759 const ET1 v3( element->value() );
761 const size_t i4( element->index() );
762 const ET1 v4( element->value() );
765 for(
size_t j=0UL; j<jend; j+=4UL ) {
766 (~C)(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
767 (~C)(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
768 (~C)(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
769 (~C)(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
771 for(
size_t j=jend; j<B.columns(); ++j ) {
772 (~C)(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
776 for( ; element!=end; ++element ) {
777 for(
size_t j=0UL; j<jend; j+=4UL ) {
778 (~C)(i,j ) += element->value() * B(element->index(),j );
779 (~C)(i,j+1UL) += element->value() * B(element->index(),j+1UL);
780 (~C)(i,j+2UL) += element->value() * B(element->index(),j+2UL);
781 (~C)(i,j+3UL) += element->value() * B(element->index(),j+3UL);
783 for(
size_t j=jend; j<B.columns(); ++j ) {
784 (~C)(i,j) += element->value() * B(element->index(),j);
806 template<
typename MT3
809 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
810 selectAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
812 typedef IntrinsicTrait<ElementType> IT;
813 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
815 const size_t N( B.columns() );
817 for(
size_t i=0UL; i<A.rows(); ++i )
819 const ConstIterator end( A.end(i) );
820 ConstIterator element( A.begin(i) );
822 const size_t kend( A.nonZeros(i) & size_t(-4) );
824 for(
size_t k=0UL; k<kend; k+=4UL ) {
825 const size_t i1( element->index() );
828 const size_t i2( element->index() );
831 const size_t i3( element->index() );
834 const size_t i4( element->index() );
838 for(
size_t j=0UL; j<N; j+=IT::size ) {
839 store( &(~C)(i,j),
load( &(~C)(i,j) ) + v1 * B.get(i1,j) + v2 * B.get(i2,j) + v3 * B.get(i3,j) + v4 * B.get(i4,j) );
843 for( ; element!=end; ++element ) {
844 const size_t i1( element->index() );
847 for(
size_t j=0UL; j<N; j+=IT::size ) {
848 store( &(~C)(i,j),
load( &(~C)(i,j) ) + v1 * B.get(i1,j) );
870 template<
typename MT3
874 selectAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
876 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
878 const size_t jend( B.columns() & size_t(-4) );
881 for(
size_t j=0UL; j<jend; j+=4UL ) {
882 for(
size_t i=0UL; i<A.rows(); ++i )
884 ConstIterator element( A.begin(i) );
885 const ConstIterator end( A.end(i) );
887 for( ; element!=end; ++element ) {
888 (~C)(i,j ) += element->value() * B(element->index(),j );
889 (~C)(i,j+1UL) += element->value() * B(element->index(),j+1UL);
890 (~C)(i,j+2UL) += element->value() * B(element->index(),j+2UL);
891 (~C)(i,j+3UL) += element->value() * B(element->index(),j+3UL);
896 for(
size_t j=jend; j<B.columns(); ++j ) {
897 for(
size_t i=0UL; i<A.rows(); ++i )
899 ConstIterator element( A.begin(i) );
900 const ConstIterator end( A.end(i) );
902 for( ; element!=end; ++element )
903 (~C)(i,j) += element->value() * B(element->index(),j);
926 template<
typename MT
945 SMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
964 template<
typename MT3
967 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
968 selectSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
970 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
972 for(
size_t i=0UL; i<A.rows(); ++i )
974 const ConstIterator end( A.end(i) );
976 for(
size_t j=0UL; j<B.columns(); ++j )
978 ConstIterator element( A.begin(i) );
980 for( ; element!=end; ++element ) {
981 (~C)(i,j) -= element->value() * B(element->index(),j);
1003 template<
typename MT3
1006 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
1007 selectSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1009 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
1011 const size_t jend( B.columns() & size_t(-4) );
1012 BLAZE_INTERNAL_ASSERT( ( B.columns() - ( B.columns() % 4UL ) ) == jend,
"Invalid end calculation" );
1014 for(
size_t i=0UL; i<A.rows(); ++i )
1016 const ConstIterator end( A.end(i) );
1017 ConstIterator element( A.begin(i) );
1019 const size_t kend( A.nonZeros(i) & size_t(-4) );
1021 for(
size_t k=0UL; k<kend; k+=4UL ) {
1022 const size_t i1( element->index() );
1023 const ET1 v1( element->value() );
1025 const size_t i2( element->index() );
1026 const ET1 v2( element->value() );
1028 const size_t i3( element->index() );
1029 const ET1 v3( element->value() );
1031 const size_t i4( element->index() );
1032 const ET1 v4( element->value() );
1035 for(
size_t j=0UL; j<jend; j+=4UL ) {
1036 (~C)(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1037 (~C)(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1038 (~C)(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1039 (~C)(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1041 for(
size_t j=jend; j<B.columns(); ++j ) {
1042 (~C)(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1046 for( ; element!=end; ++element ) {
1047 for(
size_t j=0UL; j<jend; j+=4UL ) {
1048 (~C)(i,j ) -= element->value() * B(element->index(),j );
1049 (~C)(i,j+1UL) -= element->value() * B(element->index(),j+1UL);
1050 (~C)(i,j+2UL) -= element->value() * B(element->index(),j+2UL);
1051 (~C)(i,j+3UL) -= element->value() * B(element->index(),j+3UL);
1053 for(
size_t j=jend; j<B.columns(); ++j ) {
1054 (~C)(i,j) -= element->value() * B(element->index(),j);
1076 template<
typename MT3
1079 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
1080 selectSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1082 typedef IntrinsicTrait<ElementType> IT;
1083 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
1085 const size_t N( B.columns() );
1087 for(
size_t i=0UL; i<A.rows(); ++i )
1089 const ConstIterator end( A.end(i) );
1090 ConstIterator element( A.begin(i) );
1092 const size_t kend( A.nonZeros(i) & size_t(-4) );
1094 for(
size_t k=0UL; k<kend; k+=4UL ) {
1095 const size_t i1( element->index() );
1098 const size_t i2( element->index() );
1101 const size_t i3( element->index() );
1104 const size_t i4( element->index() );
1108 for(
size_t j=0UL; j<N; j+=IT::size ) {
1109 store( &(~C)(i,j),
load( &(~C)(i,j) ) - v1 * B.get(i1,j) - v2 * B.get(i2,j) - v3 * B.get(i3,j) - v4 * B.get(i4,j) );
1113 for( ; element!=end; ++element ) {
1114 const size_t i1( element->index() );
1117 for(
size_t j=0UL; j<N; j+=IT::size ) {
1118 store( &(~C)(i,j),
load( &(~C)(i,j) ) - v1 * B.get(i1,j) );
1140 template<
typename MT3
1144 selectSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1146 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
1148 const size_t jend( B.columns() & size_t(-4) );
1149 BLAZE_INTERNAL_ASSERT( ( B.columns() - ( B.columns() % 4UL ) ) == jend,
"Invalid end calculation" );
1151 for(
size_t j=0UL; j<jend; j+=4UL ) {
1152 for(
size_t i=0UL; i<A.rows(); ++i )
1154 ConstIterator element( A.begin(i) );
1155 const ConstIterator end( A.end(i) );
1157 for( ; element!=end; ++element ) {
1158 (~C)(i,j ) -= element->value() * B(element->index(),j );
1159 (~C)(i,j+1UL) -= element->value() * B(element->index(),j+1UL);
1160 (~C)(i,j+2UL) -= element->value() * B(element->index(),j+2UL);
1161 (~C)(i,j+3UL) -= element->value() * B(element->index(),j+3UL);
1166 for(
size_t j=jend; j<B.columns(); ++j ) {
1167 for(
size_t i=0UL; i<A.rows(); ++i )
1169 ConstIterator element( A.begin(i) );
1170 const ConstIterator end( A.end(i) );
1172 for( ; element!=end; ++element )
1173 (~C)(i,j) -= element->value() * B(element->index(),j);
1241 template<
typename T1
1243 inline const SMatDMatMultExpr<T1,T2>
1249 throw std::invalid_argument(
"Matrix sizes do not match" );
1266 template<
typename MT1,
typename MT2,
typename VT >
1267 struct DMatDVecMultExprTrait< SMatDMatMultExpr<MT1,MT2>, VT >
1271 typedef typename SelectType< IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1272 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
1273 IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
1274 ,
typename SMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
1275 , INVALID_TYPE >::Type Type;
1284 template<
typename MT1,
typename MT2,
typename VT >
1285 struct DMatSVecMultExprTrait< SMatDMatMultExpr<MT1,MT2>, VT >
1289 typedef typename SelectType< IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1290 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
1291 IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
1292 ,
typename SMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
1293 , INVALID_TYPE >::Type Type;
1302 template<
typename VT,
typename MT1,
typename MT2 >
1303 struct TDVecDMatMultExprTrait< VT, SMatDMatMultExpr<MT1,MT2> >
1307 typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
1308 IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1309 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
1310 ,
typename TDVecDMatMultExprTrait< typename TDVecSMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1311 , INVALID_TYPE >::Type Type;
1320 template<
typename VT,
typename MT1,
typename MT2 >
1321 struct TSVecDMatMultExprTrait< VT, SMatDMatMultExpr<MT1,MT2> >
1325 typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
1326 IsSparseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
1327 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
1328 ,
typename TSVecDMatMultExprTrait< typename TSVecSMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1329 , INVALID_TYPE >::Type Type;
1338 template<
typename MT1,
typename MT2 >
1339 struct RowExprTrait< SMatDMatMultExpr<MT1,MT2> >
1343 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
1352 template<
typename MT1,
typename MT2 >
1353 struct ColumnExprTrait< SMatDMatMultExpr<MT1,MT2> >
1357 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;