Blaze 3.9
DVecDVecInnerExpr.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
36#define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
43#include <blaze/math/Aliases.h>
47#include <blaze/math/SIMD.h>
55#include <blaze/util/Assert.h>
56#include <blaze/util/EnableIf.h>
58#include <blaze/util/Types.h>
60
61
62namespace blaze {
63
64//=================================================================================================
65//
66// CLASS DEFINITION
67//
68//=================================================================================================
69
70//*************************************************************************************************
75template< typename VT1 // Type of the left-hand side dense vector
76 , typename VT2 > // Type of the right-hand side dense vector
77struct DVecDVecInnerExprHelper
78{
79 //**Type definitions****************************************************************************
81 using CT1 = RemoveReference_t< CompositeType_t<VT1> >;
82
84 using CT2 = RemoveReference_t< CompositeType_t<VT2> >;
85 //**********************************************************************************************
86
87 //**********************************************************************************************
88 static constexpr bool value =
89 ( useOptimizedKernels &&
90 CT1::simdEnabled &&
91 CT2::simdEnabled &&
92 IsSIMDCombinable_v< ElementType_t<CT1>, ElementType_t<CT2> > &&
93 HasSIMDAdd_v< ElementType_t<CT1>, ElementType_t<CT1> > &&
94 HasSIMDMult_v< ElementType_t<CT1>, ElementType_t<CT1> > );
95 //**********************************************************************************************
96};
98//*************************************************************************************************
99
100
101
102
103//=================================================================================================
104//
105// GLOBAL BINARY ARITHMETIC OPERATORS
106//
107//=================================================================================================
108
109//*************************************************************************************************
123template< typename VT1 // Type of the left-hand side dense vector
124 , typename VT2 > // Type of the right-hand side dense vector
125inline auto dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
126 -> DisableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
127 , const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
128{
129 using CT1 = CompositeType_t<VT1>;
130 using CT2 = CompositeType_t<VT2>;
131 using ET1 = ElementType_t<VT1>;
132 using ET2 = ElementType_t<VT2>;
133 using MultType = MultTrait_t<ET1,ET2>;
134
135 BLAZE_INTERNAL_ASSERT( (*lhs).size() == (*rhs).size(), "Invalid vector sizes" );
136
137 if( (*lhs).size() == 0UL ) return MultType();
138
139 CT1 left ( *lhs );
140 CT2 right( *rhs );
141
142 MultType sp( left[0UL] * right[0UL] );
143 size_t i( 1UL );
144
145 for( ; (i+4UL) <= left.size(); i+=4UL ) {
146 sp += left[i ] * right[i ] +
147 left[i+1UL] * right[i+1UL] +
148 left[i+2UL] * right[i+2UL] +
149 left[i+3UL] * right[i+3UL];
150 }
151 for( ; (i+2UL) <= left.size(); i+=2UL ) {
152 sp += left[i ] * right[i ] +
153 left[i+1UL] * right[i+1UL];
154 }
155 for( ; i<left.size(); ++i ) {
156 sp += left[i] * right[i];
157 }
158
159 return sp;
160}
162//*************************************************************************************************
163
164
165//*************************************************************************************************
179template< typename VT1 // Type of the left-hand side dense vector
180 , typename VT2 > // Type of the right-hand side dense vector
181inline auto dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
182 -> EnableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
183 , const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
184{
185 using CT1 = CompositeType_t<VT1>;
186 using CT2 = CompositeType_t<VT2>;
187 using XT1 = RemoveReference_t<CT1>;
188 using XT2 = RemoveReference_t<CT2>;
189 using ET1 = ElementType_t<VT1>;
190 using ET2 = ElementType_t<VT2>;
191 using MultType = MultTrait_t<ET1,ET2>;
192
193 BLAZE_INTERNAL_ASSERT( (*lhs).size() == (*rhs).size(), "Invalid vector sizes" );
194
195 if( (*lhs).size() == 0UL ) return MultType();
196
197 CT1 left ( *lhs );
198 CT2 right( *rhs );
199
200 constexpr size_t SIMDSIZE = SIMDTrait<MultType>::size;
201 constexpr bool remainder( !IsPadded_v<XT1> || !IsPadded_v<XT2> );
202
203 const size_t N( left.size() );
204
205 const size_t ipos( remainder ? prevMultiple( N, SIMDSIZE ): N );
206 BLAZE_INTERNAL_ASSERT( ipos <= N, "Invalid end calculation" );
207
208 SIMDTrait_t<MultType> xmm1, xmm2, xmm3, xmm4;
209 size_t i( 0UL );
210
211 for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
212 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
213 xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
214 xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
215 xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
216 }
217 for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
218 xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
219 xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
220 }
221 for( ; i<ipos; i+=SIMDSIZE ) {
222 xmm1 = xmm1 + ( left.load(i) * right.load(i) );
223 }
224
225 MultType sp( sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
226
227 for( ; remainder && i<N; ++i ) {
228 sp += left[i] * right[i];
229 }
230
231 return sp;
232}
234//*************************************************************************************************
235
236
237//*************************************************************************************************
263template< typename VT1 // Type of the left-hand side dense vector
264 , typename VT2 > // Type of the right-hand side dense vector
265inline decltype(auto)
266 operator*( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
267{
269
270 if( (*lhs).size() != (*rhs).size() ) {
271 BLAZE_THROW_INVALID_ARGUMENT( "Vector sizes do not match" );
272 }
273
274 return dvecdvecinner( *lhs, *rhs );
275}
276//*************************************************************************************************
277
278} // namespace blaze
279
280#endif
Header file for auxiliary alias declarations.
Header file for run time assertion macros.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the HasSIMDAdd type trait.
Header file for the HasSIMDMult type trait.
Header file for the IsPadded type trait.
Header file for the IsSIMDCombinable type trait.
Deactivation of problematic macros.
Header file for the multiplication trait.
Header file for the prevMultiple shim.
Header file for the RemoveReference type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.
Definition: DenseVector.h:77
Header file for the DenseVector base class.
decltype(auto) sum(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of addition.
Definition: DMatReduceExpr.h:2156
BLAZE_ALWAYS_INLINE constexpr auto prevMultiple(T1 value, T2 factor) noexcept
Rounds down an integral value to the previous multiple of a given factor.
Definition: PrevMultiple.h:68
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:676
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.
Definition: Exception.h:235
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
Header file for the exception macros of the math module.
System settings for performance optimizations.
Header file for basic type definitions.