Blaze  3.6
DVecDVecInnerExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/Exception.h>
46 #include <blaze/math/SIMD.h>
53 #include <blaze/util/Assert.h>
54 #include <blaze/util/DisableIf.h>
55 #include <blaze/util/EnableIf.h>
57 #include <blaze/util/Types.h>
59 
60 
61 namespace blaze {
62 
63 //=================================================================================================
64 //
65 // CLASS DEFINITION
66 //
67 //=================================================================================================
68 
69 //*************************************************************************************************
74 template< typename VT1 // Type of the left-hand side dense vector
75  , typename VT2 > // Type of the right-hand side dense vector
76 struct DVecDVecInnerExprHelper
77 {
78  //**Type definitions****************************************************************************
80  using CT1 = RemoveReference_t< CompositeType_t<VT1> >;
81 
83  using CT2 = RemoveReference_t< CompositeType_t<VT2> >;
84  //**********************************************************************************************
85 
86  //**********************************************************************************************
87  static constexpr bool value =
88  ( useOptimizedKernels &&
89  CT1::simdEnabled &&
90  CT2::simdEnabled &&
91  IsSIMDCombinable_v< ElementType_t<CT1>, ElementType_t<CT2> > &&
92  HasSIMDAdd_v< ElementType_t<CT1>, ElementType_t<CT1> > &&
93  HasSIMDMult_v< ElementType_t<CT1>, ElementType_t<CT1> > );
94  //**********************************************************************************************
95 };
97 //*************************************************************************************************
98 
99 
100 
101 
102 //=================================================================================================
103 //
104 // GLOBAL BINARY ARITHMETIC OPERATORS
105 //
106 //=================================================================================================
107 
108 //*************************************************************************************************
122 template< typename VT1 // Type of the left-hand side dense vector
123  , typename VT2 > // Type of the right-hand side dense vector
124 inline auto dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
125  -> DisableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
126  , const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
127 {
128  using CT1 = CompositeType_t<VT1>;
129  using CT2 = CompositeType_t<VT2>;
130  using ET1 = ElementType_t<VT1>;
131  using ET2 = ElementType_t<VT2>;
132  using MultType = MultTrait_t<ET1,ET2>;
133 
134  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
135 
136  if( (~lhs).size() == 0UL ) return MultType();
137 
138  CT1 left ( ~lhs );
139  CT2 right( ~rhs );
140 
141  MultType sp( left[0UL] * right[0UL] );
142  size_t i( 1UL );
143 
144  for( ; (i+4UL) <= left.size(); i+=4UL ) {
145  sp += left[i ] * right[i ] +
146  left[i+1UL] * right[i+1UL] +
147  left[i+2UL] * right[i+2UL] +
148  left[i+3UL] * right[i+3UL];
149  }
150  for( ; (i+2UL) <= left.size(); i+=2UL ) {
151  sp += left[i ] * right[i ] +
152  left[i+1UL] * right[i+1UL];
153  }
154  for( ; i<left.size(); ++i ) {
155  sp += left[i] * right[i];
156  }
157 
158  return sp;
159 }
161 //*************************************************************************************************
162 
163 
164 //*************************************************************************************************
178 template< typename VT1 // Type of the left-hand side dense vector
179  , typename VT2 > // Type of the right-hand side dense vector
180 inline auto dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
181  -> EnableIf_t< DVecDVecInnerExprHelper<VT1,VT2>::value
182  , const MultTrait_t< ElementType_t<VT1>, ElementType_t<VT2> > >
183 {
184  using CT1 = CompositeType_t<VT1>;
185  using CT2 = CompositeType_t<VT2>;
186  using XT1 = RemoveReference_t<CT1>;
187  using XT2 = RemoveReference_t<CT2>;
188  using ET1 = ElementType_t<VT1>;
189  using ET2 = ElementType_t<VT2>;
190  using MultType = MultTrait_t<ET1,ET2>;
191 
192  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
193 
194  if( (~lhs).size() == 0UL ) return MultType();
195 
196  CT1 left ( ~lhs );
197  CT2 right( ~rhs );
198 
199  constexpr size_t SIMDSIZE = SIMDTrait<MultType>::size;
200  constexpr bool remainder( !usePadding || !IsPadded_v<XT1> || !IsPadded_v<XT2> );
201 
202  const size_t N( left.size() );
203 
204  const size_t ipos( ( remainder )?( N & size_t(-SIMDSIZE) ):( N ) );
205  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
206 
207  SIMDTrait_t<MultType> xmm1, xmm2, xmm3, xmm4;
208  size_t i( 0UL );
209 
210  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
211  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
212  xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
213  xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
214  xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
215  }
216  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
217  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
218  xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
219  }
220  for( ; i<ipos; i+=SIMDSIZE ) {
221  xmm1 = xmm1 + ( left.load(i) * right.load(i) );
222  }
223 
224  MultType sp( sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
225 
226  for( ; remainder && i<N; ++i ) {
227  sp += left[i] * right[i];
228  }
229 
230  return sp;
231 }
233 //*************************************************************************************************
234 
235 
236 //*************************************************************************************************
262 template< typename VT1 // Type of the left-hand side dense vector
263  , typename VT2 > // Type of the right-hand side dense vector
264 inline decltype(auto)
265  operator*( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
266 {
268 
269  if( (~lhs).size() != (~rhs).size() ) {
270  BLAZE_THROW_INVALID_ARGUMENT( "Vector sizes do not match" );
271  }
272 
273  return dvecdvecinner( ~lhs, ~rhs );
274 }
275 //*************************************************************************************************
276 
277 } // namespace blaze
278 
279 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Header file for basic type definitions.
Header file for the DenseVector base class.
System settings for performance optimizations.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
decltype(auto) sum(const DenseMatrix< MT, SO > &dm)
Reduces the given dense matrix by means of addition.
Definition: DMatReduceExpr.h:2147
Header file for the HasSIMDAdd type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:76
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the IsPadded type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
constexpr size_t size(const Matrix< MT, SO > &matrix) noexcept
Returns the total number of elements of the matrix.
Definition: Matrix.h:530
Header file for the RemoveReference type trait.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
Header file for the function trace functionality.