DVecDVecInnerExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/Exception.h>
46 #include <blaze/math/SIMD.h>
52 #include <blaze/util/Assert.h>
53 #include <blaze/util/DisableIf.h>
54 #include <blaze/util/EnableIf.h>
56 #include <blaze/util/Types.h>
59 
60 
61 namespace blaze {
62 
63 //=================================================================================================
64 //
65 // CLASS DEFINITION
66 //
67 //=================================================================================================
68 
69 //*************************************************************************************************
74 template< typename VT1 // Type of the left-hand side dense vector
75  , typename VT2 > // Type of the right-hand side dense vector
76 struct DVecDVecInnerExprHelper
77 {
78  //**Type definitions****************************************************************************
80  using CT1 = RemoveReference_< CompositeType_<VT1> >;
81 
83  using CT2 = RemoveReference_< CompositeType_<VT2> >;
84  //**********************************************************************************************
85 
86  //**********************************************************************************************
87  enum : bool { value = useOptimizedKernels &&
88  CT1::simdEnabled &&
89  CT2::simdEnabled &&
90  IsSame< ElementType_<CT1>, ElementType_<CT2> >::value &&
91  HasSIMDAdd< ElementType_<CT1>, ElementType_<CT1> >::value &&
92  HasSIMDMult< ElementType_<CT1>, ElementType_<CT1> >::value };
93  //**********************************************************************************************
94 };
96 //*************************************************************************************************
97 
98 
99 
100 
101 //=================================================================================================
102 //
103 // GLOBAL BINARY ARITHMETIC OPERATORS
104 //
105 //=================================================================================================
106 
107 //*************************************************************************************************
121 template< typename VT1 // Type of the left-hand side dense vector
122  , typename VT2 > // Type of the right-hand side dense vector
123 inline DisableIf_< DVecDVecInnerExprHelper<VT1,VT2>
124  , const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
125  dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
126 {
127  using Lhs = CompositeType_<VT1>;
128  using Rhs = CompositeType_<VT2>;
129  using ET1 = ElementType_<VT1>;
130  using ET2 = ElementType_<VT2>;
131  using MultType = MultTrait_<ET1,ET2>;
132 
133  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
134 
135  if( (~lhs).size() == 0UL ) return MultType();
136 
137  Lhs left ( ~lhs );
138  Rhs right( ~rhs );
139 
140  MultType sp( left[0UL] * right[0UL] );
141  size_t i( 1UL );
142 
143  for( ; (i+4UL) <= left.size(); i+=4UL ) {
144  sp += left[i ] * right[i ] +
145  left[i+1UL] * right[i+1UL] +
146  left[i+2UL] * right[i+2UL] +
147  left[i+3UL] * right[i+3UL];
148  }
149  for( ; (i+2UL) <= left.size(); i+=2UL ) {
150  sp += left[i ] * right[i ] +
151  left[i+1UL] * right[i+1UL];
152  }
153  for( ; i<left.size(); ++i ) {
154  sp += left[i] * right[i];
155  }
156 
157  return sp;
158 }
160 //*************************************************************************************************
161 
162 
163 //*************************************************************************************************
177 template< typename VT1 // Type of the left-hand side dense vector
178  , typename VT2 > // Type of the right-hand side dense vector
179 inline EnableIf_< DVecDVecInnerExprHelper<VT1,VT2>
180  , const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
181  dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
182 {
183  using Lhs = CompositeType_<VT1>;
184  using Rhs = CompositeType_<VT2>;
185  using ET1 = ElementType_<VT1>;
186  using ET2 = ElementType_<VT2>;
187  using MultType = MultTrait_<ET1,ET2>;
188 
189  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
190 
191  enum : size_t { SIMDSIZE = SIMDTrait<MultType>::size };
192 
193  if( (~lhs).size() == 0UL ) return MultType();
194 
195  Lhs left ( ~lhs );
196  Rhs right( ~rhs );
197 
198  const size_t N( left.size() );
199 
200  constexpr bool remainder( !usePadding || !IsPadded<VT1>::value || !IsPadded<VT2>::value );
201 
202  const size_t ipos( ( remainder )?( N & size_t(-SIMDSIZE) ):( N ) );
203  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
204 
205  SIMDTrait_<MultType> xmm1, xmm2, xmm3, xmm4;
206  size_t i( 0UL );
207 
208  for( ; (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL ) {
209  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
210  xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
211  xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
212  xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
213  }
214  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL ) {
215  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
216  xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
217  }
218  for( ; i<ipos; i+=SIMDSIZE ) {
219  xmm1 = xmm1 + ( left.load(i) * right.load(i) );
220  }
221 
222  MultType sp( sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
223 
224  for( ; remainder && i<N; ++i ) {
225  sp += left[i] * right[i];
226  }
227 
228  return sp;
229 }
231 //*************************************************************************************************
232 
233 
234 //*************************************************************************************************
260 template< typename VT1 // Type of the left-hand side dense vector
261  , typename VT2 > // Type of the right-hand side dense vector
262 inline decltype(auto)
263  operator*( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
264 {
266 
267  if( (~lhs).size() != (~rhs).size() ) {
268  BLAZE_THROW_INVALID_ARGUMENT( "Vector sizes do not match" );
269  }
270 
271  return dvecdvecinner( ~lhs, ~rhs );
272 }
273 //*************************************************************************************************
274 
275 } // namespace blaze
276 
277 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
Header file for the IsSame and IsStrictlySame type traits.
Header file for the DenseVector base class.
System settings for performance optimizations.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the HasSIMDAdd type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:76
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the IsPadded type trait.
Header file for the HasSIMDMult type trait.
BLAZE_ALWAYS_INLINE ValueType_< T > sum(const SIMDi8< T > &a) noexcept
Returns the sum of all elements in the 8-bit integral SIMD vector.
Definition: Reduction.h:65
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the RemoveReference type trait.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the function trace functionality.