22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
88 template<
typename MT1
90 class TDMatTSMatMultExpr :
public DenseMatrix< TDMatTSMatMultExpr<MT1,MT2>, true >
91 ,
private MatMatMultExpr
96 typedef typename MT1::ResultType
RT1;
97 typedef typename MT2::ResultType
RT2;
98 typedef typename MT1::ElementType
ET1;
99 typedef typename MT2::ElementType
ET2;
100 typedef typename MT1::CompositeType
CT1;
101 typedef typename MT2::CompositeType
CT2;
109 template<
typename T1,
typename T2,
typename T3 >
110 struct UseVectorizedKernel {
111 enum { value = T1::vectorizable && T2::vectorizable &&
128 template<
typename T1,
typename T2,
typename T3 >
129 struct UseOptimizedKernel {
130 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
142 template<
typename T1,
typename T2,
typename T3 >
143 struct UseDefaultKernel {
144 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
145 !UseOptimizedKernel<T1,T2,T3>::value };
176 enum { vectorizable = 0 };
209 if(
lhs_.columns() == 0UL )
217 const ConstIterator end( B.end(j) );
218 ConstIterator element( B.begin(j) );
225 tmp =
lhs_(i,element->index()) * element->value();
227 for( ; element!=end; ++element )
228 tmp +=
lhs_(i,element->index()) * element->value();
234 for(
size_t k=1UL; k<
lhs_.columns(); ++k ) {
259 return rhs_.columns();
289 template<
typename T >
291 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
301 template<
typename T >
303 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
326 template<
typename MT
345 TDMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
364 template<
typename MT3
372 const size_t iend( A.rows() &
size_t(-4) );
375 for(
size_t i=0UL; i<iend; i+=4UL ) {
376 for(
size_t j=0UL; j<B.columns(); ++j )
378 ConstIterator element( B.begin(j) );
379 const ConstIterator end( B.end(j) );
381 if( element == end ) {
383 reset( (~C)(i+1UL,j) );
384 reset( (~C)(i+2UL,j) );
385 reset( (~C)(i+3UL,j) );
389 (~C)(i ,j) = A(i ,element->index()) * element->value();
390 (~C)(i+1UL,j) = A(i+1UL,element->index()) * element->value();
391 (~C)(i+2UL,j) = A(i+2UL,element->index()) * element->value();
392 (~C)(i+3UL,j) = A(i+3UL,element->index()) * element->value();
394 for( ; element!=end; ++element ) {
395 (~C)(i ,j) += A(i ,element->index()) * element->value();
396 (~C)(i+1UL,j) += A(i+1UL,element->index()) * element->value();
397 (~C)(i+2UL,j) += A(i+2UL,element->index()) * element->value();
398 (~C)(i+3UL,j) += A(i+3UL,element->index()) * element->value();
403 for(
size_t i=iend; i<A.rows(); ++i ) {
404 for(
size_t j=0UL; j<B.columns(); ++j )
406 ConstIterator element( B.begin(j) );
407 const ConstIterator end( B.end(j) );
409 if( element == end ) {
414 (~C)(i,j) = A(i,element->index()) * element->value();
416 for( ; element!=end; ++element )
417 (~C)(i,j) += A(i,element->index()) * element->value();
438 template<
typename MT3
441 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
442 selectAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
444 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
446 for(
size_t j=0UL; j<B.columns(); ++j ) {
447 for(
size_t i=0UL; i<(~C).
rows(); ++i ) {
450 ConstIterator element( B.begin(j) );
451 const ConstIterator end( B.end(j) );
452 for( ; element!=end; ++element ) {
453 for(
size_t i=0UL; i<A.rows(); ++i ) {
455 (~C)(i,j) = A(i,element->index()) * element->value();
457 (~C)(i,j) += A(i,element->index()) * element->value();
479 template<
typename MT3
482 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
483 selectAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
485 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
487 const size_t iend( A.rows() & size_t(-4) );
492 for(
size_t j=0UL; j<B.columns(); ++j )
494 const ConstIterator end( B.end(j) );
495 ConstIterator element( B.begin(j) );
497 const size_t kend( B.nonZeros(j) & size_t(-4) );
499 for(
size_t k=0UL; k<kend; k+=4UL ) {
500 const size_t j1( element->index() );
501 const ET2 v1( element->value() );
503 const size_t j2( element->index() );
504 const ET2 v2( element->value() );
506 const size_t j3( element->index() );
507 const ET2 v3( element->value() );
509 const size_t j4( element->index() );
510 const ET2 v4( element->value() );
513 for(
size_t i=0UL; i<iend; i+=4UL ) {
514 (~C)(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
515 (~C)(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
516 (~C)(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
517 (~C)(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
519 for(
size_t i=iend; i<A.rows(); ++i ) {
520 (~C)(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
524 for( ; element!=end; ++element ) {
525 for(
size_t i=0UL; i<iend; i+=4UL ) {
526 (~C)(i ,j) += A(i ,element->index()) * element->value();
527 (~C)(i+1UL,j) += A(i+1UL,element->index()) * element->value();
528 (~C)(i+2UL,j) += A(i+2UL,element->index()) * element->value();
529 (~C)(i+3UL,j) += A(i+3UL,element->index()) * element->value();
531 for(
size_t i=iend; i<A.rows(); ++i ) {
532 (~C)(i,j) += A(i,element->index()) * element->value();
554 template<
typename MT3
557 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
558 selectAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
560 typedef IntrinsicTrait<ElementType> IT;
561 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
563 const size_t M( A.rows() );
567 for(
size_t j=0UL; j<B.columns(); ++j )
569 const ConstIterator end( B.end(j) );
570 ConstIterator element( B.begin(j) );
572 const size_t kend( B.nonZeros(j) & size_t(-4) );
574 for(
size_t k=0UL; k<kend; k+=4UL ) {
575 const size_t j1( element->index() );
578 const size_t j2( element->index() );
581 const size_t j3( element->index() );
584 const size_t j4( element->index() );
588 for(
size_t i=0UL; i<M; i+=IT::size ) {
589 store( &(~C)(i,j),
load( &(~C)(i,j) ) + A.get(i,j1) * v1 + A.get(i,j2) * v2 + A.get(i,j3) * v3 + A.get(i,j4) * v4 );
593 for( ; element!=end; ++element ) {
594 const size_t j1( element->index() );
597 for(
size_t i=0UL; i<M; i+=IT::size ) {
598 store( &(~C)(i,j),
load( &(~C)(i,j) ) + A.get(i,j1) * v1 );
618 template<
typename MT
624 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
636 const TmpType tmp( rhs );
655 template<
typename MT
674 TDMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
693 template<
typename MT3
697 selectAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
699 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
701 const size_t iend( A.rows() & size_t(-4) );
704 for(
size_t i=0UL; i<iend; i+=4UL ) {
705 for(
size_t j=0UL; j<B.columns(); ++j )
707 ConstIterator element( B.begin(j) );
708 const ConstIterator end( B.end(j) );
710 for( ; element!=end; ++element ) {
711 (~C)(i ,j) += A(i ,element->index()) * element->value();
712 (~C)(i+1UL,j) += A(i+1UL,element->index()) * element->value();
713 (~C)(i+2UL,j) += A(i+2UL,element->index()) * element->value();
714 (~C)(i+3UL,j) += A(i+3UL,element->index()) * element->value();
719 for(
size_t i=iend; i<A.rows(); ++i ) {
720 for(
size_t j=0UL; j<B.columns(); ++j )
722 ConstIterator element( B.begin(j) );
723 const ConstIterator end( B.end(j) );
725 for( ; element!=end; ++element )
726 (~C)(i,j) += A(i,element->index()) * element->value();
747 template<
typename MT3
750 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
751 selectAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
753 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
755 for(
size_t j=0UL; j<B.columns(); ++j ) {
756 ConstIterator element( B.begin(j) );
757 const ConstIterator end( B.end(j) );
758 for( ; element!=end; ++element ) {
759 for(
size_t i=0UL; i<A.rows(); ++i ) {
761 (~C)(i,j) = A(i,element->index()) * element->value();
763 (~C)(i,j) += A(i,element->index()) * element->value();
785 template<
typename MT3
788 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
789 selectAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
791 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
793 const size_t iend( A.rows() & size_t(-4) );
796 for(
size_t j=0UL; j<B.columns(); ++j )
798 const ConstIterator end( B.end(j) );
799 ConstIterator element( B.begin(j) );
801 const size_t kend( B.nonZeros(j) & size_t(-4) );
803 for(
size_t k=0UL; k<kend; k+=4UL ) {
804 const size_t j1( element->index() );
805 const ET2 v1( element->value() );
807 const size_t j2( element->index() );
808 const ET2 v2( element->value() );
810 const size_t j3( element->index() );
811 const ET2 v3( element->value() );
813 const size_t j4( element->index() );
814 const ET2 v4( element->value() );
817 for(
size_t i=0UL; i<iend; i+=4UL ) {
818 (~C)(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
819 (~C)(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
820 (~C)(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
821 (~C)(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
823 for(
size_t i=iend; i<A.rows(); ++i ) {
824 (~C)(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
828 for( ; element!=end; ++element ) {
829 for(
size_t i=0UL; i<iend; i+=4UL ) {
830 (~C)(i ,j) += A(i ,element->index()) * element->value();
831 (~C)(i+1UL,j) += A(i+1UL,element->index()) * element->value();
832 (~C)(i+2UL,j) += A(i+2UL,element->index()) * element->value();
833 (~C)(i+3UL,j) += A(i+3UL,element->index()) * element->value();
835 for(
size_t i=iend; i<A.rows(); ++i ) {
836 (~C)(i,j) += A(i,element->index()) * element->value();
858 template<
typename MT3
861 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
862 selectAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
864 typedef IntrinsicTrait<ElementType> IT;
865 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
867 const size_t M( A.rows() );
869 for(
size_t j=0UL; j<B.columns(); ++j )
871 const ConstIterator end( B.end(j) );
872 ConstIterator element( B.begin(j) );
874 const size_t kend( B.nonZeros(j) & size_t(-4) );
876 for(
size_t k=0UL; k<kend; k+=4UL ) {
877 const size_t j1( element->index() );
880 const size_t j2( element->index() );
883 const size_t j3( element->index() );
886 const size_t j4( element->index() );
890 for(
size_t i=0UL; i<M; i+=IT::size ) {
891 store( &(~C)(i,j),
load( &(~C)(i,j) ) + A.get(i,j1) * v1 + A.get(i,j2) * v2 + A.get(i,j3) * v3 + A.get(i,j4) * v4 );
895 for( ; element!=end; ++element ) {
896 const size_t j1( element->index() );
899 for(
size_t i=0UL; i<M; i+=IT::size ) {
900 store( &(~C)(i,j),
load( &(~C)(i,j) ) + A.get(i,j1) * v1 );
925 template<
typename MT
944 TDMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
963 template<
typename MT3
967 selectSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
969 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
971 const size_t iend( A.rows() & size_t(-4) );
974 for(
size_t i=0UL; i<iend; i+=4UL ) {
975 for(
size_t j=0UL; j<B.columns(); ++j )
977 ConstIterator element( B.begin(j) );
978 const ConstIterator end( B.end(j) );
980 for( ; element!=end; ++element ) {
981 (~C)(i ,j) -= A(i ,element->index()) * element->value();
982 (~C)(i+1UL,j) -= A(i+1UL,element->index()) * element->value();
983 (~C)(i+2UL,j) -= A(i+2UL,element->index()) * element->value();
984 (~C)(i+3UL,j) -= A(i+3UL,element->index()) * element->value();
989 for(
size_t i=iend; i<A.rows(); ++i ) {
990 for(
size_t j=0UL; j<B.columns(); ++j )
992 ConstIterator element( B.begin(j) );
993 const ConstIterator end( B.end(j) );
995 for( ; element!=end; ++element )
996 (~C)(i,j) -= A(i,element->index()) * element->value();
1017 template<
typename MT3
1020 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1021 selectSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1023 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
1025 for(
size_t j=0UL; j<B.columns(); ++j ) {
1026 ConstIterator element( B.begin(j) );
1027 const ConstIterator end( B.end(j) );
1028 for( ; element!=end; ++element ) {
1029 for(
size_t i=0UL; i<A.rows(); ++i ) {
1031 (~C)(i,j) = -A(i,element->index()) * element->value();
1033 (~C)(i,j) -= A(i,element->index()) * element->value();
1055 template<
typename MT3
1058 static inline typename EnableIf< UseOptimizedKernel<MT3,MT4,MT5> >::Type
1059 selectSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1061 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
1063 const size_t iend( A.rows() & size_t(-4) );
1066 for(
size_t j=0UL; j<B.columns(); ++j )
1068 const ConstIterator end( B.end(j) );
1069 ConstIterator element( B.begin(j) );
1071 const size_t kend( B.nonZeros(j) & size_t(-4) );
1073 for(
size_t k=0UL; k<kend; k+=4UL ) {
1074 const size_t j1( element->index() );
1075 const ET2 v1( element->value() );
1077 const size_t j2( element->index() );
1078 const ET2 v2( element->value() );
1080 const size_t j3( element->index() );
1081 const ET2 v3( element->value() );
1083 const size_t j4( element->index() );
1084 const ET2 v4( element->value() );
1087 for(
size_t i=0UL; i<iend; i+=4UL ) {
1088 (~C)(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1089 (~C)(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1090 (~C)(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1091 (~C)(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1093 for(
size_t i=iend; i<A.rows(); ++i ) {
1094 (~C)(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1098 for( ; element!=end; ++element ) {
1099 for(
size_t i=0UL; i<iend; i+=4UL ) {
1100 (~C)(i ,j) -= A(i ,element->index()) * element->value();
1101 (~C)(i+1UL,j) -= A(i+1UL,element->index()) * element->value();
1102 (~C)(i+2UL,j) -= A(i+2UL,element->index()) * element->value();
1103 (~C)(i+3UL,j) -= A(i+3UL,element->index()) * element->value();
1105 for(
size_t i=iend; i<A.rows(); ++i ) {
1106 (~C)(i,j) -= A(i,element->index()) * element->value();
1128 template<
typename MT3
1131 static inline typename EnableIf< UseVectorizedKernel<MT3,MT4,MT5> >::Type
1132 selectSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1134 typedef IntrinsicTrait<ElementType> IT;
1135 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
1137 const size_t M( A.rows() );
1139 for(
size_t j=0UL; j<B.columns(); ++j )
1141 const ConstIterator end( B.end(j) );
1142 ConstIterator element( B.begin(j) );
1144 const size_t kend( B.nonZeros(j) & size_t(-4) );
1146 for(
size_t k=0UL; k<kend; k+=4UL ) {
1147 const size_t j1( element->index() );
1150 const size_t j2( element->index() );
1153 const size_t j3( element->index() );
1156 const size_t j4( element->index() );
1160 for(
size_t i=0UL; i<M; i+=IT::size ) {
1161 store( &(~C)(i,j),
load( &(~C)(i,j) ) - A.get(i,j1) * v1 - A.get(i,j2) * v2 - A.get(i,j3) * v3 - A.get(i,j4) * v4 );
1165 for( ; element!=end; ++element ) {
1166 const size_t j1( element->index() );
1169 for(
size_t i=0UL; i<M; i+=IT::size ) {
1170 store( &(~C)(i,j),
load( &(~C)(i,j) ) - A.get(i,j1) * v1 );
1239 template<
typename T1
1241 inline const TDMatTSMatMultExpr<T1,T2>
1247 throw std::invalid_argument(
"Matrix sizes do not match" );
1264 template<
typename MT1,
typename MT2,
typename VT >
1265 struct TDMatDVecMultExprTrait< TDMatTSMatMultExpr<MT1,MT2>, VT >
1269 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1270 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
1271 IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
1272 ,
typename TDMatDVecMultExprTrait< MT1, typename TSMatDVecMultExprTrait<MT2,VT>::Type >::Type
1273 , INVALID_TYPE >::Type Type;
1282 template<
typename MT1,
typename MT2,
typename VT >
1283 struct TDMatSVecMultExprTrait< TDMatTSMatMultExpr<MT1,MT2>, VT >
1287 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1288 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
1289 IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
1290 ,
typename TDMatDVecMultExprTrait< MT1, typename TSMatDVecMultExprTrait<MT2,VT>::Type >::Type
1291 , INVALID_TYPE >::Type Type;
1300 template<
typename VT,
typename MT1,
typename MT2 >
1301 struct TDVecTDMatMultExprTrait< VT, TDMatTSMatMultExpr<MT1,MT2> >
1305 typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
1306 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1307 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
1308 ,
typename TDVecTSMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1309 , INVALID_TYPE >::Type Type;
1318 template<
typename VT,
typename MT1,
typename MT2 >
1319 struct TSVecTDMatMultExprTrait< VT, TDMatTSMatMultExpr<MT1,MT2> >
1323 typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
1324 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
1325 IsSparseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
1326 ,
typename TDVecTSMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
1327 , INVALID_TYPE >::Type Type;
1336 template<
typename MT1,
typename MT2 >
1337 struct RowExprTrait< TDMatTSMatMultExpr<MT1,MT2> >
1341 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
1350 template<
typename MT1,
typename MT2 >
1351 struct ColumnExprTrait< TDMatTSMatMultExpr<MT1,MT2> >
1355 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;