35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_ 36 #define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_ 74 template<
typename VT1
76 struct DVecDVecInnerExprHelper
80 using CT1 = RemoveReference_< CompositeType_<VT1> >;
83 using CT2 = RemoveReference_< CompositeType_<VT2> >;
87 enum :
bool { value = useOptimizedKernels &&
90 IsSame< ElementType_<CT1>, ElementType_<CT2> >::value &&
91 HasSIMDAdd< ElementType_<CT1>, ElementType_<CT1> >::value &&
92 HasSIMDMult< ElementType_<CT1>, ElementType_<CT1> >::value };
121 template<
typename VT1
123 inline DisableIf_< DVecDVecInnerExprHelper<VT1,VT2>
124 ,
const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
125 dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
127 using Lhs = CompositeType_<VT1>;
128 using Rhs = CompositeType_<VT2>;
129 using ET1 = ElementType_<VT1>;
130 using ET2 = ElementType_<VT2>;
131 using MultType = MultTrait_<ET1,ET2>;
135 if( (~lhs).
size() == 0UL )
return MultType();
140 MultType sp( left[0UL] * right[0UL] );
143 for( ; (i+4UL) <= left.size(); i+=4UL ) {
144 sp += left[i ] * right[i ] +
145 left[i+1UL] * right[i+1UL] +
146 left[i+2UL] * right[i+2UL] +
147 left[i+3UL] * right[i+3UL];
149 for( ; (i+2UL) <= left.size(); i+=2UL ) {
150 sp += left[i ] * right[i ] +
151 left[i+1UL] * right[i+1UL];
153 for( ; i<left.size(); ++i ) {
154 sp += left[i] * right[i];
177 template<
typename VT1
179 inline EnableIf_< DVecDVecInnerExprHelper<VT1,VT2>
180 ,
const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
181 dvecdvecinner(
const DenseVector<VT1,true>& lhs,
const DenseVector<VT2,false>& rhs )
183 using Lhs = CompositeType_<VT1>;
184 using Rhs = CompositeType_<VT2>;
185 using ET1 = ElementType_<VT1>;
186 using ET2 = ElementType_<VT2>;
187 using MultType = MultTrait_<ET1,ET2>;
193 if( (~lhs).
size() == 0UL )
return MultType();
198 const size_t N( left.size() );
200 constexpr
bool remainder( !usePadding || !IsPadded<VT1>::value || !IsPadded<VT2>::value );
202 const size_t ipos( ( remainder )?( N &
size_t(-SIMDSIZE) ):( N ) );
205 SIMDTrait_<MultType> xmm1, xmm2, xmm3, xmm4;
208 for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
209 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
210 xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
211 xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
212 xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
214 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
215 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
216 xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
218 for( ; i<ipos; i+=SIMDSIZE ) {
219 xmm1 = xmm1 + ( left.load(i) * right.load(i) );
222 MultType sp(
sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
224 for( ; remainder && i<N; ++i ) {
225 sp += left[i] * right[i];
260 template<
typename VT1
262 inline decltype(
auto)
267 if( (~lhs).
size() != (~rhs).
size() ) {
271 return dvecdvecinner( ~lhs, ~rhs );
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
Header file for the IsSame and IsStrictlySame type traits.
Header file for the DenseVector base class.
System settings for performance optimizations.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the HasSIMDAdd type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:76
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the IsPadded type trait.
Header file for the HasSIMDMult type trait.
BLAZE_ALWAYS_INLINE ValueType_< T > sum(const SIMDi8< T > &a) noexcept
Returns the sum of all elements in the 8-bit integral SIMD vector.
Definition: Reduction.h:65
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the RemoveReference type trait.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the function trace functionality.