22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATSVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATSVECMULTEXPR_H_
75 class TDMatSVecMultExpr :
public DenseVector< TDMatSVecMultExpr<MT,VT>, false >
76 ,
private MatVecMultExpr
81 typedef typename MT::ResultType
MRT;
82 typedef typename VT::ResultType
VRT;
83 typedef typename MRT::ElementType
MET;
84 typedef typename VRT::ElementType
VET;
85 typedef typename MT::CompositeType
MCT;
86 typedef typename VT::CompositeType
VCT;
101 template<
typename T1,
typename T2,
typename T3 >
102 struct UseVectorizedKernel {
103 enum { value = T1::vectorizable && T2::vectorizable &&
118 template<
typename T1,
typename T2,
typename T3 >
119 struct UseOptimizedKernel {
120 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
121 !IsResizable<typename T1::ElementType>::value &&
122 !IsResizable<VET>::value };
132 template<
typename T1,
typename T2,
typename T3 >
133 struct UseDefaultKernel {
134 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
135 !UseOptimizedKernel<T1,T2,T3>::value };
165 enum { vectorizable = 0 };
197 ConstIterator element( x.begin() );
200 if( element != x.end() ) {
201 res =
mat_( index, element->index() ) * element->value();
203 for( ; element!=x.end(); ++element ) {
204 res +=
mat_( index, element->index() ) * element->value();
251 template<
typename T >
253 return mat_.isAliased( alias ) ||
vec_.isAliased( alias );
263 template<
typename T >
265 return mat_.isAliased( alias ) ||
vec_.isAliased( alias );
287 template<
typename VT1 >
296 if( x.nonZeros() == 0UL ) {
311 TDMatSVecMultExpr::selectAssignKernel( ~lhs, A, x );
330 template<
typename VT1
334 selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
340 const size_t M( A.rows() );
342 ConstIterator element( x.begin() );
343 const ConstIterator end( x.end() );
345 for(
size_t i=0UL; i<M; ++i ) {
346 y[i] = A(i,element->index()) * element->value();
351 for( ; element!=end; ++element ) {
352 for(
size_t i=0UL; i<M; ++i ) {
353 y[i] += A(i,element->index()) * element->value();
374 template<
typename VT1
377 static inline typename EnableIf< UseOptimizedKernel<VT1,MT1,VT2> >::Type
378 selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
380 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
384 const size_t M( A.rows() );
386 ConstIterator element( x.begin() );
387 const ConstIterator end( x.end() );
389 const size_t jend( x.nonZeros() & size_t(-4) );
390 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == jend,
"Invalid end calculation" );
394 const size_t j1( element->index() );
395 const VET v1( element->value() );
397 const size_t j2( element->index() );
398 const VET v2( element->value() );
400 const size_t j3( element->index() );
401 const VET v3( element->value() );
403 const size_t j4( element->index() );
404 const VET v4( element->value() );
407 for(
size_t i=0UL; i<M; ++i ) {
408 y[i] = A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
413 const size_t j1( element->index() );
414 const VET v1( element->value() );
417 for(
size_t i=0UL; i<M; ++i ) {
422 for(
size_t j=(jend>3UL)?(4UL):(1UL); (j+4UL)<=jend; j+=4UL )
424 const size_t j1( element->index() );
425 const VET v1( element->value() );
427 const size_t j2( element->index() );
428 const VET v2( element->value() );
430 const size_t j3( element->index() );
431 const VET v3( element->value() );
433 const size_t j4( element->index() );
434 const VET v4( element->value() );
437 for(
size_t i=0UL; i<M; ++i ) {
438 y[i] += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
441 for( ; element!=end; ++element )
443 const size_t j1( element->index() );
444 const VET v1( element->value() );
446 for(
size_t i=0UL; i<M; ++i ) {
447 y[i] += A(i,j1) * v1;
468 template<
typename VT1
471 static inline typename EnableIf< UseVectorizedKernel<VT1,MT1,VT2> >::Type
472 selectAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
474 typedef IntrinsicTrait<ElementType> IT;
475 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
479 const size_t M( A.rows() );
481 ConstIterator element( x.begin() );
482 const ConstIterator end( x.end() );
484 const size_t jend( x.nonZeros() & size_t(-4) );
485 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == jend,
"Invalid end calculation" );
489 const size_t j1( element->index() );
492 const size_t j2( element->index() );
495 const size_t j3( element->index() );
498 const size_t j4( element->index() );
502 for(
size_t i=0UL; i<M; i+=IT::size ) {
503 store( &y[i], A.get(i,j1) * v1 + A.get(i,j2) * v2 + A.get(i,j3) * v3 + A.get(i,j4) * v4 );
508 const size_t j1( element->index() );
512 for(
size_t i=0UL; i<M; i+=IT::size ) {
513 store( &y[i], A.get(i,j1) * v1 );
517 for(
size_t j=(jend>3UL)?(4UL):(1UL); (j+4UL)<=jend; j+=4UL )
519 const size_t j1( element->index() );
522 const size_t j2( element->index() );
525 const size_t j3( element->index() );
528 const size_t j4( element->index() );
532 for(
size_t i=0UL; i<M; i+=IT::size ) {
533 store( &y[i],
load( &y[i] ) + A.get(i,j1) * v1 + A.get(i,j2) * v2 + A.get(i,j3) * v3 + A.get(i,j4) * v4 );
536 for( ; element!=end; ++element )
538 const size_t j1( element->index() );
541 for(
size_t i=0UL; i<M; i+=IT::size ) {
542 store( &y[i],
load( &y[i] ) + A.get(i,j1) * v1 );
561 template<
typename VT1 >
591 template<
typename VT1 >
598 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
602 if( x.nonZeros() == 0UL )
return;
614 TDMatSVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
633 template<
typename VT1
636 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
637 selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
639 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
643 const size_t M( A.rows() );
645 ConstIterator element( x.begin() );
646 const ConstIterator end( x.end() );
648 for( ; element!=end; ++element ) {
649 for(
size_t i=0UL; i<M; ++i ) {
650 y[i] += A(i,element->index()) * element->value();
671 template<
typename VT1
674 static inline typename EnableIf< UseOptimizedKernel<VT1,MT1,VT2> >::Type
675 selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
677 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
681 const size_t M( A.rows() );
683 ConstIterator element( x.begin() );
684 const ConstIterator end( x.end() );
686 const size_t jend( x.nonZeros() & size_t(-4) );
687 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == jend,
"Invalid end calculation" );
689 for(
size_t j=0UL; (j+4UL)<=jend; j+=4UL )
691 const size_t j1( element->index() );
692 const VET v1( element->value() );
694 const size_t j2( element->index() );
695 const VET v2( element->value() );
697 const size_t j3( element->index() );
698 const VET v3( element->value() );
700 const size_t j4( element->index() );
701 const VET v4( element->value() );
704 for(
size_t i=0UL; i<M; ++i ) {
705 y[i] += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
708 for( ; element!=end; ++element )
710 const size_t j1( element->index() );
711 const VET v1( element->value() );
713 for(
size_t i=0UL; i<M; ++i ) {
714 y[i] += A(i,j1) * v1;
735 template<
typename VT1
738 static inline typename EnableIf< UseVectorizedKernel<VT1,MT1,VT2> >::Type
739 selectAddAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
741 typedef IntrinsicTrait<ElementType> IT;
742 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
746 const size_t M( A.rows() );
748 ConstIterator element( x.begin() );
749 const ConstIterator end( x.end() );
751 const size_t jend( x.nonZeros() & size_t(-4) );
752 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == jend,
"Invalid end calculation" );
754 for(
size_t j=0UL; (j+4UL)<=jend; j+=4UL )
756 const size_t j1( element->index() );
759 const size_t j2( element->index() );
762 const size_t j3( element->index() );
765 const size_t j4( element->index() );
769 for(
size_t i=0UL; i<M; i+=IT::size ) {
770 store( &y[i],
load( &y[i] ) + A.get(i,j1) * v1 + A.get(i,j2) * v2 + A.get(i,j3) * v3 + A.get(i,j4) * v4 );
773 for( ; element!=end; ++element )
775 const size_t j1( element->index() );
778 for(
size_t i=0UL; i<M; i+=IT::size ) {
779 store( &y[i],
load( &y[i] ) + A.get(i,j1) * v1 );
803 template<
typename VT1 >
810 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
814 if( x.nonZeros() == 0UL )
return;
826 TDMatSVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
845 template<
typename VT1
848 static inline typename EnableIf< UseDefaultKernel<VT1,MT1,VT2> >::Type
849 selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
851 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
855 const size_t M( A.rows() );
857 ConstIterator element( x.begin() );
858 const ConstIterator end( x.end() );
860 for( ; element!=end; ++element ) {
861 for(
size_t i=0UL; i<M; ++i ) {
862 y[i] -= A(i,element->index()) * element->value();
883 template<
typename VT1
886 static inline typename EnableIf< UseOptimizedKernel<VT1,MT1,VT2> >::Type
887 selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
889 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
893 const size_t M( A.rows() );
895 ConstIterator element( x.begin() );
896 const ConstIterator end( x.end() );
898 const size_t jend( x.nonZeros() & size_t(-4) );
899 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == jend,
"Invalid end calculation" );
901 for(
size_t j=0UL; (j+4UL)<=jend; j+=4UL )
903 const size_t j1( element->index() );
904 const VET v1( element->value() );
906 const size_t j2( element->index() );
907 const VET v2( element->value() );
909 const size_t j3( element->index() );
910 const VET v3( element->value() );
912 const size_t j4( element->index() );
913 const VET v4( element->value() );
916 for(
size_t i=0UL; i<M; ++i ) {
917 y[i] -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
920 for( ; element!=end; ++element )
922 const size_t j1( element->index() );
923 const VET v1( element->value() );
925 for(
size_t i=0UL; i<M; ++i ) {
926 y[i] -= A(i,j1) * v1;
947 template<
typename VT1
950 static inline typename EnableIf< UseVectorizedKernel<VT1,MT1,VT2> >::Type
951 selectSubAssignKernel( VT1& y,
const MT1& A,
const VT2& x )
953 typedef IntrinsicTrait<ElementType> IT;
954 typedef typename RemoveReference<RT>::Type::ConstIterator ConstIterator;
958 const size_t M( A.rows() );
960 ConstIterator element( x.begin() );
961 const ConstIterator end( x.end() );
963 const size_t jend( x.nonZeros() & size_t(-4) );
964 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == jend,
"Invalid end calculation" );
966 for(
size_t j=0UL; (j+4UL)<=jend; j+=4UL )
968 const size_t j1( element->index() );
971 const size_t j2( element->index() );
974 const size_t j3( element->index() );
977 const size_t j4( element->index() );
981 for(
size_t i=0UL; i<M; i+=IT::size ) {
982 store( &y[i],
load( &y[i] ) - A.get(i,j1) * v1 - A.get(i,j2) * v2 - A.get(i,j3) * v3 - A.get(i,j4) * v4 );
985 for( ; element!=end; ++element )
987 const size_t j1( element->index() );
990 for(
size_t i=0UL; i<M; i+=IT::size ) {
991 store( &y[i],
load( &y[i] ) - A.get(i,j1) * v1 );
1015 template<
typename VT1 >
1088 template<
typename T1
1090 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatSVecMultExpr<T1,T2> >::Type
1095 if( (~mat).
columns() != (~vec).size() )
1096 throw std::invalid_argument(
"Matrix and vector sizes do not match" );