35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_ 74 template<
typename VT1
76 struct DVecDVecInnerExprHelper
80 using CT1 = RemoveReference_t< CompositeType_t<VT1> >;
83 using CT2 = RemoveReference_t< CompositeType_t<VT2> >;
87 static constexpr
bool value =
88 ( useOptimizedKernels &&
91 IsSIMDCombinable_v< ElementType_t<CT1>, ElementType_t<CT2> > &&
92 HasSIMDAdd_v< ElementType_t<CT1>, ElementType_t<CT1> > &&
93 HasSIMDMult_v< ElementType_t<CT1>, ElementType_t<CT1> > );
122 template<
typename VT1
124 inline auto dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
125 -> DisableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
126 ,
const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
128 using CT1 = CompositeType_t<VT1>;
129 using CT2 = CompositeType_t<VT2>;
130 using ET1 = ElementType_t<VT1>;
131 using ET2 = ElementType_t<VT2>;
132 using MultType = MultTrait_t<ET1,ET2>;
136 if( (~lhs).
size() == 0UL )
return MultType();
141 MultType sp( left[0UL] * right[0UL] );
144 for( ; (i+4UL) <= left.size(); i+=4UL ) {
145 sp += left[i ] * right[i ] +
146 left[i+1UL] * right[i+1UL] +
147 left[i+2UL] * right[i+2UL] +
148 left[i+3UL] * right[i+3UL];
150 for( ; (i+2UL) <= left.size(); i+=2UL ) {
151 sp += left[i ] * right[i ] +
152 left[i+1UL] * right[i+1UL];
154 for( ; i<left.size(); ++i ) {
155 sp += left[i] * right[i];
178 template<
typename VT1
180 inline auto dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
181 -> EnableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
182 ,
const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
184 using CT1 = CompositeType_t<VT1>;
185 using CT2 = CompositeType_t<VT2>;
186 using XT1 = RemoveReference_t<CT1>;
187 using XT2 = RemoveReference_t<CT2>;
188 using ET1 = ElementType_t<VT1>;
189 using ET2 = ElementType_t<VT2>;
190 using MultType = MultTrait_t<ET1,ET2>;
194 if( (~lhs).
size() == 0UL )
return MultType();
200 constexpr
bool remainder( !usePadding || !IsPadded_v<XT1> || !IsPadded_v<XT2> );
202 const size_t N( left.size() );
204 const size_t ipos( ( remainder )?( N &
size_t(-SIMDSIZE) ):( N ) );
207 SIMDTrait_t<MultType> xmm1, xmm2, xmm3, xmm4;
210 for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
211 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
212 xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
213 xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
214 xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
216 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
217 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
218 xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
220 for( ; i<ipos; i+=SIMDSIZE ) {
221 xmm1 = xmm1 + ( left.load(i) * right.load(i) );
224 MultType sp(
sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
226 for( ; remainder && i<N; ++i ) {
227 sp += left[i] * right[i];
262 template<
typename VT1
264 inline decltype(
auto)
269 if( (~lhs).
size() != (~rhs).
size() ) {
273 return dvecdvecinner( ~lhs, ~rhs );
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Header file for basic type definitions.
Header file for the DenseVector base class.
System settings for performance optimizations.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
decltype(auto) sum(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of addition.
Definition: DMatReduceExpr.h:2146
Header file for the HasSIMDAdd type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:76
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the IsPadded type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
Header file for the RemoveReference type trait.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the function trace functionality.