35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_ 73 template<
typename VT1
75 struct DVecDVecInnerExprHelper
79 using CT1 = RemoveReference_< CompositeType_<VT1> >;
82 using CT2 = RemoveReference_< CompositeType_<VT2> >;
86 enum :
bool { value = useOptimizedKernels &&
89 IsSame< ElementType_<CT1>, ElementType_<CT2> >::value &&
90 HasSIMDAdd< ElementType_<CT1>, ElementType_<CT1> >::value &&
91 HasSIMDMult< ElementType_<CT1>, ElementType_<CT1> >::value };
120 template<
typename VT1
122 inline DisableIf_< DVecDVecInnerExprHelper<VT1,VT2>
123 ,
const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
124 dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
126 using Lhs = CompositeType_<VT1>;
127 using Rhs = CompositeType_<VT2>;
128 using ET1 = ElementType_<VT1>;
129 using ET2 = ElementType_<VT2>;
130 using MultType = MultTrait_<ET1,ET2>;
134 if( (~lhs).
size() == 0UL )
return MultType();
139 MultType sp( left[0UL] * right[0UL] );
142 for( ; (i+4UL) <= left.size(); i+=4UL ) {
143 sp += left[i ] * right[i ] +
144 left[i+1UL] * right[i+1UL] +
145 left[i+2UL] * right[i+2UL] +
146 left[i+3UL] * right[i+3UL];
148 for( ; (i+2UL) <= left.size(); i+=2UL ) {
149 sp += left[i ] * right[i ] +
150 left[i+1UL] * right[i+1UL];
152 for( ; i<left.size(); ++i ) {
153 sp += left[i] * right[i];
176 template<
typename VT1
178 inline EnableIf_< DVecDVecInnerExprHelper<VT1,VT2>
179 ,
const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
180 dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
182 using Lhs = CompositeType_<VT1>;
183 using Rhs = CompositeType_<VT2>;
184 using ET1 = ElementType_<VT1>;
185 using ET2 = ElementType_<VT2>;
186 using MultType = MultTrait_<ET1,ET2>;
192 if( (~lhs).
size() == 0UL )
return MultType();
197 const size_t N( left.size() );
199 SIMDTrait_<MultType> xmm1, xmm2, xmm3, xmm4;
202 for( ; (i+SIMDSIZE*4UL) <= N; i+=SIMDSIZE*4UL ) {
203 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
204 xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
205 xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
206 xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
208 for( ; (i+SIMDSIZE*2UL) <= N; i+=SIMDSIZE*2UL ) {
209 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
210 xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
212 for( ; (i+SIMDSIZE) <= N; i+=SIMDSIZE ) {
213 xmm1 = xmm1 + ( left.load(i) * right.load(i) );
216 MultType sp(
sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
219 sp += left[i] * right[i];
254 template<
typename VT1
256 inline decltype(
auto)
261 if( (~lhs).
size() != (~rhs).
size() ) {
265 return dvecdvecinner( ~lhs, ~rhs );
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
Header file for the IsSame and IsStrictlySame type traits.
Header file for the DenseVector base class.
System settings for performance optimizations.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the HasSIMDAdd type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the HasSIMDMult type trait.
BLAZE_ALWAYS_INLINE ValueType_< T > sum(const SIMDi8< T > &a) noexcept
Returns the sum of all elements in the 8-bit integral SIMD vector.
Definition: Reduction.h:65
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the RemoveReference type trait.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the function trace functionality.