22 #ifndef _BLAZE_MATH_EXPRESSIONS_TSVECDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TSVECDMATMULTEXPR_H_
77 class TSVecDMatMultExpr :
public DenseVector< TSVecDMatMultExpr<VT,MT>, true >
78 ,
private TVecMatMultExpr
83 typedef typename VT::ResultType
VRT;
84 typedef typename MT::ResultType
MRT;
85 typedef typename VRT::ElementType
VET;
86 typedef typename MRT::ElementType
MET;
87 typedef typename VT::CompositeType
VCT;
88 typedef typename MT::CompositeType
MCT;
103 template<
typename T1,
typename T2,
typename T3 >
104 struct UseVectorizedKernel {
105 enum { value = T1::vectorizable && T3::vectorizable &&
120 template<
typename T1,
typename T2,
typename T3 >
121 struct UseOptimizedKernel {
122 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
123 !IsResizable<typename T1::ElementType>::value &&
124 !IsResizable<VET>::value };
134 template<
typename T1,
typename T2,
typename T3 >
135 struct UseDefaultKernel {
136 enum { value = !UseVectorizedKernel<T1,T2,T3>::value &&
137 !UseOptimizedKernel<T1,T2,T3>::value };
167 enum { vectorizable = 0 };
199 const ConstIterator end( x.end() );
200 ConstIterator element( x.begin() );
203 if( element != end ) {
204 res = element->value() *
mat_( element->index(), index );
206 for( ; element!=end; ++element )
207 res += element->value() *
mat_( element->index(), index );
223 return mat_.columns();
253 template<
typename T >
255 return vec_.isAliased( alias ) ||
mat_.isAliased( alias );
265 template<
typename T >
267 return vec_.isAliased( alias ) ||
mat_.isAliased( alias );
289 template<
typename VT2 >
298 if( x.nonZeros() == 0UL ) {
313 TSVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
332 template<
typename VT1
336 selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
342 const size_t N( A.columns() );
344 ConstIterator element( x.begin() );
345 const ConstIterator end( x.end() );
347 for(
size_t j=0UL; j<N; ++j ) {
348 y[j] = element->value() * A(element->index(),j);
353 for( ; element!=end; ++element ) {
354 for(
size_t j=0UL; j<N; ++j ) {
355 y[j] += element->value() * A(element->index(),j);
376 template<
typename VT1
379 static inline typename EnableIf< UseOptimizedKernel<VT1,VT2,MT1> >::Type
380 selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
382 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
386 const size_t N( A.columns() );
388 ConstIterator element( x.begin() );
389 const ConstIterator end( x.end() );
391 const size_t iend( x.nonZeros() & size_t(-4) );
392 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == iend,
"Invalid end calculation" );
396 const size_t i1( element->index() );
397 const VET v1( element->value() );
399 const size_t i2( element->index() );
400 const VET v2( element->value() );
402 const size_t i3( element->index() );
403 const VET v3( element->value() );
405 const size_t i4( element->index() );
406 const VET v4( element->value() );
409 for(
size_t j=0UL; j<N; ++j ) {
410 y[j] = v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
415 const size_t i1( element->index() );
416 const VET v1( element->value() );
419 for(
size_t j=0UL; j<N; ++j ) {
424 for(
size_t i=(iend>3UL)?(4UL):(1UL); (i+4UL)<=iend; i+=4UL )
426 const size_t i1( element->index() );
427 const VET v1( element->value() );
429 const size_t i2( element->index() );
430 const VET v2( element->value() );
432 const size_t i3( element->index() );
433 const VET v3( element->value() );
435 const size_t i4( element->index() );
436 const VET v4( element->value() );
439 for(
size_t j=0UL; j<N; ++j ) {
440 y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
443 for( ; element!=end; ++element )
445 const size_t i1( element->index() );
446 const VET v1( element->value() );
448 for(
size_t j=0UL; j<N; ++j ) {
449 y[j] += v1 * A(i1,j);
470 template<
typename VT1
473 static inline typename EnableIf< UseVectorizedKernel<VT1,VT2,MT1> >::Type
474 selectAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
476 typedef IntrinsicTrait<ElementType> IT;
477 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
481 const size_t N( A.columns() );
483 ConstIterator element( x.begin() );
484 const ConstIterator end( x.end() );
486 const size_t iend( x.nonZeros() & size_t(-4) );
487 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == iend,
"Invalid end calculation" );
491 const size_t i1( element->index() );
494 const size_t i2( element->index() );
497 const size_t i3( element->index() );
500 const size_t i4( element->index() );
504 for(
size_t j=0UL; j<N; j+=IT::size ) {
505 store( &y[j], v1 * A.get(i1,j) + v2 * A.get(i2,j) + v3 * A.get(i3,j) + v4 * A.get(i4,j) );
510 const size_t i1( element->index() );
514 for(
size_t j=0UL; j<N; j+=IT::size ) {
515 store( &y[j], v1 * A.get(i1,j) );
519 for(
size_t i=(iend>3UL)?(4UL):(1UL); (i+4UL)<=iend; i+=4UL )
521 const size_t i1( element->index() );
524 const size_t i2( element->index() );
527 const size_t i3( element->index() );
530 const size_t i4( element->index() );
534 for(
size_t j=0UL; j<N; j+=IT::size ) {
535 store( &y[j],
load( &y[j] ) + v1 * A.get(i1,j) + v2 * A.get(i2,j) + v3 * A.get(i3,j) + v4 * A.get(i4,j) );
538 for( ; element!=end; ++element )
540 const size_t i1( element->index() );
543 for(
size_t j=0UL; j<N; j+=IT::size ) {
544 store( &y[j],
load( &y[j] ) + v1 * A.get(i1,j) );
563 template<
typename VT2 >
591 template<
typename VT2 >
600 if( x.nonZeros() == 0UL )
return;
612 TSVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
630 template<
typename VT1
634 selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
640 const size_t N( A.columns() );
642 ConstIterator element( x.begin() );
643 const ConstIterator end( x.end() );
645 for( ; element!=end; ++element ) {
646 for(
size_t j=0UL; j<N; ++j ) {
647 y[j] += element->value() * A(element->index(),j);
668 template<
typename VT1
671 static inline typename EnableIf< UseOptimizedKernel<VT1,VT2,MT1> >::Type
672 selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
674 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
678 const size_t N( A.columns() );
680 ConstIterator element( x.begin() );
681 const ConstIterator end( x.end() );
683 const size_t iend( x.nonZeros() & size_t(-4) );
684 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == iend,
"Invalid end calculation" );
686 for(
size_t i=0UL; (i+4UL)<=iend; i+=4UL )
688 const size_t i1( element->index() );
689 const VET v1( element->value() );
691 const size_t i2( element->index() );
692 const VET v2( element->value() );
694 const size_t i3( element->index() );
695 const VET v3( element->value() );
697 const size_t i4( element->index() );
698 const VET v4( element->value() );
701 for(
size_t j=0UL; j<N; ++j ) {
702 y[j] += v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
705 for( ; element!=end; ++element )
707 const size_t i1( element->index() );
708 const VET v1( element->value() );
710 for(
size_t j=0UL; j<N; ++j ) {
711 y[j] += v1 * A(i1,j);
732 template<
typename VT1
735 static inline typename EnableIf< UseVectorizedKernel<VT1,VT2,MT1> >::Type
736 selectAddAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
738 typedef IntrinsicTrait<ElementType> IT;
739 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
743 const size_t N( A.columns() );
745 ConstIterator element( x.begin() );
746 const ConstIterator end( x.end() );
748 const size_t iend( x.nonZeros() & size_t(-4) );
749 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == iend,
"Invalid end calculation" );
751 for(
size_t i=0UL; (i+4UL)<=iend; i+=4UL )
753 const size_t i1( element->index() );
756 const size_t i2( element->index() );
759 const size_t i3( element->index() );
762 const size_t i4( element->index() );
766 for(
size_t j=0UL; j<N; j+=IT::size ) {
767 store( &y[j],
load( &y[j] ) + v1 * A.get(i1,j) + v2 * A.get(i2,j) + v3 * A.get(i3,j) + v4 * A.get(i4,j) );
770 for( ; element!=end; ++element )
772 const size_t i1( element->index() );
775 for(
size_t j=0UL; j<N; j+=IT::size ) {
776 store( &y[j],
load( &y[j] ) + v1 * A.get(i1,j) );
798 template<
typename VT2 >
809 if( x.nonZeros() == 0UL )
return;
821 TSVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
839 template<
typename VT1
843 selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
849 const size_t N( A.columns() );
851 ConstIterator element( x.begin() );
852 const ConstIterator end( x.end() );
854 for( ; element!=end; ++element ) {
855 for(
size_t j=0UL; j<N; ++j ) {
856 y[j] -= element->value() * A(element->index(),j);
877 template<
typename VT1
880 static inline typename EnableIf< UseOptimizedKernel<VT1,VT2,MT1> >::Type
881 selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
883 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
887 const size_t N( A.columns() );
889 ConstIterator element( x.begin() );
890 const ConstIterator end( x.end() );
892 const size_t iend( x.nonZeros() & size_t(-4) );
893 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == iend,
"Invalid end calculation" );
895 for(
size_t i=0UL; (i+4UL)<=iend; i+=4UL )
897 const size_t i1( element->index() );
898 const VET v1( element->value() );
900 const size_t i2( element->index() );
901 const VET v2( element->value() );
903 const size_t i3( element->index() );
904 const VET v3( element->value() );
906 const size_t i4( element->index() );
907 const VET v4( element->value() );
910 for(
size_t j=0UL; j<N; ++j ) {
911 y[j] -= v1 * A(i1,j) + v2 * A(i2,j) + v3 * A(i3,j) + v4 * A(i4,j);
914 for( ; element!=end; ++element )
916 const size_t i1( element->index() );
917 const VET v1( element->value() );
919 for(
size_t j=0UL; j<N; ++j ) {
920 y[j] -= v1 * A(i1,j);
941 template<
typename VT1
944 static inline typename EnableIf< UseVectorizedKernel<VT1,VT2,MT1> >::Type
945 selectSubAssignKernel( VT1& y,
const VT2& x,
const MT1& A )
947 typedef IntrinsicTrait<ElementType> IT;
948 typedef typename RemoveReference<LT>::Type::ConstIterator ConstIterator;
952 const size_t N( A.columns() );
954 ConstIterator element( x.begin() );
955 const ConstIterator end( x.end() );
957 const size_t iend( x.nonZeros() & size_t(-4) );
958 BLAZE_INTERNAL_ASSERT( ( x.nonZeros() - ( x.nonZeros() % 4UL ) ) == iend,
"Invalid end calculation" );
960 for(
size_t i=0UL; (i+4UL)<=iend; i+=4UL )
962 const size_t i1( element->index() );
965 const size_t i2( element->index() );
968 const size_t i3( element->index() );
971 const size_t i4( element->index() );
975 for(
size_t j=0UL; j<N; j+=IT::size ) {
976 store( &y[j],
load( &y[j] ) - v1 * A.get(i1,j) - v2 * A.get(i2,j) - v3 * A.get(i3,j) - v4 * A.get(i4,j) );
979 for( ; element!=x.end(); ++element )
981 const size_t i1( element->index() );
984 for(
size_t j=0UL; j<N; j+=IT::size ) {
985 store( &y[j],
load( &y[j] ) - v1 * A.get(i1,j) );
1007 template<
typename VT2 >
1078 template<
typename T1,
typename T2 >
1079 inline const typename DisableIf< IsMatMatMultExpr<T2>, TSVecDMatMultExpr<T1,T2> >::Type
1084 if( (~vec).size() != (~mat).
rows() )
1085 throw std::invalid_argument(
"Vector and matrix sizes do not match" );
1113 template<
typename T1
1116 inline const typename EnableIf< IsMatMatMultExpr<T2>, MultExprTrait<T1,T2> >::Type::Type
1121 return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();