22 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECTDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DVECTDVECMULTEXPR_H_
70 template<
typename VT1
78 typedef typename VT1::ResultType
RT1;
79 typedef typename VT2::ResultType
RT2;
80 typedef typename VT1::ReturnType
RN1;
81 typedef typename VT2::ReturnType
RN2;
82 typedef typename VT1::CompositeType
CT1;
83 typedef typename VT2::CompositeType
CT2;
84 typedef typename VT1::ElementType
ET1;
85 typedef typename VT2::ElementType
ET2;
113 template<
typename VT >
115 enum { value = useAssign };
125 template<
typename T1,
typename T2,
typename T3 >
126 struct UseVectorizedKernel {
127 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
140 template<
typename T1,
typename T2,
typename T3 >
141 struct UseDefaultKernel {
142 enum { value = !UseVectorizedKernel<T1,T2,T3>::value };
177 enum { vectorizable = VT1::vectorizable && VT2::vectorizable &&
216 inline IntrinsicType
get(
size_t i,
size_t j )
const {
221 const IntrinsicType xmm1(
set(
lhs_[i] ) );
222 const IntrinsicType xmm2(
rhs_.get( j ) );
273 template<
typename T >
275 return (
lhs_.canAlias( alias ) ||
rhs_.canAlias( alias ) );
285 template<
typename T >
287 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
311 template<
typename MT >
328 DVecTDVecMultExpr::selectAssignKernel( ~lhs, x, y );
347 template<
typename MT
353 const size_t m( (~A).
rows() );
354 const size_t n( (~A).
columns() );
356 const size_t jend( n &
size_t(-2) );
359 for(
size_t i=0UL; i<m; ++i ) {
360 for(
size_t j=0UL; j<jend; j+=2UL ) {
361 (~A)(i,j ) = x[i] * y[j ];
362 (~A)(i,j+1UL) = x[i] * y[j+1];
365 (~A)(i,jend) = x[i] * y[jend];
386 template<
typename MT
389 static inline typename EnableIf< UseVectorizedKernel<MT,VT3,VT4> >::Type
390 selectAssignKernel( DenseMatrix<MT,false>& A,
const VT3& x,
const VT4& y )
392 typedef IntrinsicTrait<ElementType> IT;
394 const size_t m( (~A).
rows() );
395 const size_t n( (~A).
columns() );
397 for(
size_t i=0UL; i<m; ++i )
399 const IntrinsicType x1(
set( x[i] ) );
401 for(
size_t j=0UL; j<n; j+=IT::size ) {
402 store( &(~A)(i,j), x1 * y.get(j) );
421 template<
typename MT >
437 DVecTDVecMultExpr::selectAssignKernel( ~lhs, x, y );
456 template<
typename MT
459 static inline typename EnableIf< UseDefaultKernel<MT,VT3,VT4> >::Type
460 selectAssignKernel( DenseMatrix<MT,true>& A,
const VT3& x,
const VT4& y )
462 const size_t m( (~A).
rows() );
463 const size_t n( (~A).
columns() );
465 const size_t iend( m &
size_t(-2) );
468 for(
size_t j=0UL; j<n; ++j ) {
469 for(
size_t i=0UL; i<iend; i+=2UL ) {
470 (~A)(i ,j) = x[i ] * y[j];
471 (~A)(i+1UL,j) = x[i+1] * y[j];
474 (~A)(iend,j) = x[iend] * y[j];
495 template<
typename MT
498 static inline typename EnableIf< UseVectorizedKernel<MT,VT3,VT4> >::Type
499 selectAssignKernel( DenseMatrix<MT,true>& A,
const VT3& x,
const VT4& y )
501 typedef IntrinsicTrait<ElementType> IT;
503 const size_t m( (~A).
rows() );
504 const size_t n( (~A).
columns() );
506 for(
size_t j=0UL; j<n; ++j )
508 const IntrinsicType y1(
set( y[j] ) );
510 for(
size_t i=0UL; i<m; i+=IT::size ) {
511 store( &(~A)(i,j), x.get(i) * y1 );
530 template<
typename MT
536 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
548 const TmpType tmp( rhs );
569 template<
typename MT >
570 friend inline typename EnableIf< UseAssign<MT> >::Type
586 DVecTDVecMultExpr::selectAddAssignKernel( ~lhs, x, y );
605 template<
typename MT
608 static inline typename EnableIf< UseDefaultKernel<MT,VT3,VT4> >::Type
609 selectAddAssignKernel( DenseMatrix<MT,false>& A,
const VT3& x,
const VT4& y )
611 const size_t m( (~A).
rows() );
612 const size_t n( (~A).
columns() );
614 const size_t jend( n &
size_t(-2) );
617 for(
size_t i=0UL; i<m; ++i ) {
618 for(
size_t j=0UL; j<jend; j+=2UL ) {
619 (~A)(i,j ) += x[i] * y[j ];
620 (~A)(i,j+1UL) += x[i] * y[j+1UL];
623 (~A)(i,jend) += x[i] * y[jend];
644 template<
typename MT
647 static inline typename EnableIf< UseVectorizedKernel<MT,VT3,VT4> >::Type
648 selectAddAssignKernel( DenseMatrix<MT,false>& A,
const VT3& x,
const VT4& y )
650 typedef IntrinsicTrait<ElementType> IT;
652 const size_t m( (~A).
rows() );
653 const size_t n( (~A).
columns() );
655 for(
size_t i=0UL; i<m; ++i )
657 const IntrinsicType x1(
set( x[i] ) );
659 for(
size_t j=0UL; j<n; j+=IT::size ) {
660 store( &(~A)(i,j),
load( &(~A)(i,j) ) + x1 * y.get(j) );
680 template<
typename MT >
696 DVecTDVecMultExpr::selectAddAssignKernel( ~lhs, x, y );
715 template<
typename MT
718 static inline typename EnableIf< UseDefaultKernel<MT,VT3,VT4> >::Type
719 selectAddAssignKernel( DenseMatrix<MT,true>& A,
const VT3& x,
const VT4& y )
721 const size_t m( (~A).
rows() );
722 const size_t n( (~A).
columns() );
724 const size_t iend( m &
size_t(-2) );
727 for(
size_t j=0UL; j<n; ++j ) {
728 for(
size_t i=0UL; i<iend; i+=2UL ) {
729 (~A)(i ,j) += x[i ] * y[j];
730 (~A)(i+1UL,j) += x[i+1UL] * y[j];
733 (~A)(iend,j) += x[iend] * y[j];
754 template<
typename MT
757 static inline typename EnableIf< UseVectorizedKernel<MT,VT3,VT4> >::Type
758 selectAddAssignKernel( DenseMatrix<MT,true>& A,
const VT3& x,
const VT4& y )
760 typedef IntrinsicTrait<ElementType> IT;
762 const size_t m( (~A).
rows() );
763 const size_t n( (~A).
columns() );
765 for(
size_t j=0UL; j<n; ++j )
767 const IntrinsicType y1(
set( y[j] ) );
769 for(
size_t i=0UL; i<m; i+=IT::size ) {
770 store( &(~A)(i,j),
load( &(~A)(i,j) ) + x.get(i) * y1 );
796 template<
typename MT >
797 friend inline typename EnableIf< UseAssign<MT> >::Type
813 DVecTDVecMultExpr::selectSubAssignKernel( ~lhs, x, y );
832 template<
typename MT
835 static inline typename EnableIf< UseDefaultKernel<MT,VT3,VT4> >::Type
836 selectSubAssignKernel( DenseMatrix<MT,false>& A,
const VT3& x,
const VT4& y )
838 const size_t m( (~A).
rows() );
839 const size_t n( (~A).
columns() );
841 const size_t jend( n &
size_t(-2) );
844 for(
size_t i=0UL; i<m; ++i ) {
845 for(
size_t j=0UL; j<jend; j+=2UL ) {
846 (~A)(i,j ) -= x[i] * y[j ];
847 (~A)(i,j+1UL) -= x[i] * y[j+1UL];
850 (~A)(i,jend) -= x[i] * y[jend];
871 template<
typename MT
874 static inline typename EnableIf< UseVectorizedKernel<MT,VT3,VT4> >::Type
875 selectSubAssignKernel( DenseMatrix<MT,false>& A,
const VT3& x,
const VT4& y )
877 typedef IntrinsicTrait<ElementType> IT;
879 const size_t m( (~A).
rows() );
880 const size_t n( (~A).
columns() );
882 for(
size_t i=0UL; i<m; ++i )
884 const IntrinsicType x1(
set( x[i] ) );
886 for(
size_t j=0UL; j<n; j+=IT::size ) {
887 store( &(~A)(i,j),
load( &(~A)(i,j) ) - x1 * y.get(j) );
907 template<
typename MT >
923 DVecTDVecMultExpr::selectSubAssignKernel( ~lhs, x, y );
942 template<
typename MT
945 static inline typename EnableIf< UseDefaultKernel<MT,VT3,VT4> >::Type
946 selectSubAssignKernel( DenseMatrix<MT,true>& A,
const VT3& x,
const VT4& y )
948 const size_t m( (~A).
rows() );
949 const size_t n( (~A).
columns() );
951 const size_t iend( m &
size_t(-2) );
954 for(
size_t j=0UL; j<n; ++j ) {
955 for(
size_t i=0UL; i<iend; i+=2UL ) {
956 (~A)(i ,j) -= x[i ] * y[j];
957 (~A)(i+1UL,j) -= x[i+1UL] * y[j];
960 (~A)(iend,j) -= x[iend] * y[j];
981 template<
typename MT
984 static inline typename EnableIf< UseVectorizedKernel<MT,VT3,VT4> >::Type
985 selectSubAssignKernel( DenseMatrix<MT,true>& A,
const VT3& x,
const VT4& y )
987 typedef IntrinsicTrait<ElementType> IT;
989 const size_t m( (~A).
rows() );
990 const size_t n( (~A).
columns() );
992 for(
size_t j=0UL; j<n; ++j )
994 const IntrinsicType y1(
set( y[j] ) );
996 for(
size_t i=0UL; i<m; i+=IT::size ) {
997 store( &(~A)(i,j),
load( &(~A)(i,j) ) - x.get(i) * y1 );
1063 template<
typename T1
1065 inline const DVecTDVecMultExpr<T1,T2>
1085 template<
typename VT1,
typename VT2 >
1090 typedef typename MultExprTrait< typename VT1::ReturnType, VT2 >::Type Type;
1099 template<
typename VT1,
typename VT2 >
1104 typedef typename MultExprTrait< VT1, typename VT2::ReturnType >::Type Type;