35#ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
36#define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
77struct DVecDVecInnerExprHelper
81 using CT1 = RemoveReference_t< CompositeType_t<VT1> >;
84 using CT2 = RemoveReference_t< CompositeType_t<VT2> >;
88 static constexpr bool value =
89 ( useOptimizedKernels &&
92 IsSIMDCombinable_v< ElementType_t<CT1>, ElementType_t<CT2> > &&
93 HasSIMDAdd_v< ElementType_t<CT1>, ElementType_t<CT1> > &&
94 HasSIMDMult_v< ElementType_t<CT1>, ElementType_t<CT1> > );
123template<
typename VT1
125inline auto dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
126 -> DisableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
127 ,
const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
129 using CT1 = CompositeType_t<VT1>;
130 using CT2 = CompositeType_t<VT2>;
131 using ET1 = ElementType_t<VT1>;
132 using ET2 = ElementType_t<VT2>;
133 using MultType = MultTrait_t<ET1,ET2>;
137 if( (*lhs).size() == 0UL )
return MultType();
142 MultType sp( left[0UL] * right[0UL] );
145 for( ; (i+4UL) <= left.size(); i+=4UL ) {
146 sp += left[i ] * right[i ] +
147 left[i+1UL] * right[i+1UL] +
148 left[i+2UL] * right[i+2UL] +
149 left[i+3UL] * right[i+3UL];
151 for( ; (i+2UL) <= left.size(); i+=2UL ) {
152 sp += left[i ] * right[i ] +
153 left[i+1UL] * right[i+1UL];
155 for( ; i<left.size(); ++i ) {
156 sp += left[i] * right[i];
179template<
typename VT1
181inline auto dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
182 -> EnableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
183 ,
const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
185 using CT1 = CompositeType_t<VT1>;
186 using CT2 = CompositeType_t<VT2>;
187 using XT1 = RemoveReference_t<CT1>;
188 using XT2 = RemoveReference_t<CT2>;
189 using ET1 = ElementType_t<VT1>;
190 using ET2 = ElementType_t<VT2>;
191 using MultType = MultTrait_t<ET1,ET2>;
195 if( (*lhs).size() == 0UL )
return MultType();
201 constexpr bool remainder( !IsPadded_v<XT1> || !IsPadded_v<XT2> );
203 const size_t N( left.size() );
205 const size_t ipos( remainder ?
prevMultiple( N, SIMDSIZE ): N );
208 SIMDTrait_t<MultType> xmm1, xmm2, xmm3, xmm4;
211 for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
212 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
213 xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
214 xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
215 xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
217 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
218 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
219 xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
221 for( ; i<ipos; i+=SIMDSIZE ) {
222 xmm1 = xmm1 + ( left.load(i) * right.load(i) );
225 MultType sp(
sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
227 for( ; remainder && i<N; ++i ) {
228 sp += left[i] * right[i];
263template<
typename VT1
270 if( (*lhs).size() != (*rhs).size() ) {
274 return dvecdvecinner( *lhs, *rhs );
Header file for auxiliary alias declarations.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the HasSIMDAdd type trait.
Header file for the HasSIMDMult type trait.
Header file for the IsPadded type trait.
Header file for the IsSIMDCombinable type trait.
Deactivation of problematic macros.
Header file for the multiplication trait.
Header file for the prevMultiple shim.
Header file for the RemoveReference type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.
Definition: DenseVector.h:77
Header file for the DenseVector base class.
decltype(auto) sum(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of addition.
Definition: DMatReduceExpr.h:2156
BLAZE_ALWAYS_INLINE constexpr auto prevMultiple(T1 value, T2 factor) noexcept
Rounds down an integral value to the previous multiple of a given factor.
Definition: PrevMultiple.h:68
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.
Definition: Exception.h:235
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the exception macros of the math module.
System settings for performance optimizations.
Header file for basic type definitions.