DVecDVecInnerExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DVECDVECINNEREXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/Exception.h>
46 #include <blaze/math/SIMD.h>
51 #include <blaze/util/Assert.h>
52 #include <blaze/util/DisableIf.h>
53 #include <blaze/util/EnableIf.h>
55 #include <blaze/util/Types.h>
58 
59 
60 namespace blaze {
61 
62 //=================================================================================================
63 //
64 // CLASS DEFINITION
65 //
66 //=================================================================================================
67 
68 //*************************************************************************************************
73 template< typename VT1 // Type of the left-hand side dense vector
74  , typename VT2 > // Type of the right-hand side dense vector
75 struct DVecDVecInnerExprHelper
76 {
77  //**Type definitions****************************************************************************
79  using CT1 = RemoveReference_< CompositeType_<VT1> >;
80 
82  using CT2 = RemoveReference_< CompositeType_<VT2> >;
83  //**********************************************************************************************
84 
85  //**********************************************************************************************
86  enum : bool { value = useOptimizedKernels &&
87  CT1::simdEnabled &&
88  CT2::simdEnabled &&
89  IsSame< ElementType_<CT1>, ElementType_<CT2> >::value &&
90  HasSIMDAdd< ElementType_<CT1>, ElementType_<CT1> >::value &&
91  HasSIMDMult< ElementType_<CT1>, ElementType_<CT1> >::value };
92  //**********************************************************************************************
93 };
95 //*************************************************************************************************
96 
97 
98 
99 
100 //=================================================================================================
101 //
102 // GLOBAL BINARY ARITHMETIC OPERATORS
103 //
104 //=================================================================================================
105 
106 //*************************************************************************************************
120 template< typename VT1 // Type of the left-hand side dense vector
121  , typename VT2 > // Type of the right-hand side dense vector
122 inline DisableIf_< DVecDVecInnerExprHelper<VT1,VT2>
123  , const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
124  dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
125 {
126  using Lhs = CompositeType_<VT1>;
127  using Rhs = CompositeType_<VT2>;
128  using ET1 = ElementType_<VT1>;
129  using ET2 = ElementType_<VT2>;
130  using MultType = MultTrait_<ET1,ET2>;
131 
132  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
133 
134  if( (~lhs).size() == 0UL ) return MultType();
135 
136  Lhs left ( ~lhs );
137  Rhs right( ~rhs );
138 
139  MultType sp( left[0UL] * right[0UL] );
140  size_t i( 1UL );
141 
142  for( ; (i+4UL) <= left.size(); i+=4UL ) {
143  sp += left[i ] * right[i ] +
144  left[i+1UL] * right[i+1UL] +
145  left[i+2UL] * right[i+2UL] +
146  left[i+3UL] * right[i+3UL];
147  }
148  for( ; (i+2UL) <= left.size(); i+=2UL ) {
149  sp += left[i ] * right[i ] +
150  left[i+1UL] * right[i+1UL];
151  }
152  for( ; i<left.size(); ++i ) {
153  sp += left[i] * right[i];
154  }
155 
156  return sp;
157 }
159 //*************************************************************************************************
160 
161 
162 //*************************************************************************************************
176 template< typename VT1 // Type of the left-hand side dense vector
177  , typename VT2 > // Type of the right-hand side dense vector
178 inline EnableIf_< DVecDVecInnerExprHelper<VT1,VT2>
179  , const MultTrait_< ElementType_<VT1>, ElementType_<VT2> > >
180  dvecdvecinner( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
181 {
182  using Lhs = CompositeType_<VT1>;
183  using Rhs = CompositeType_<VT2>;
184  using ET1 = ElementType_<VT1>;
185  using ET2 = ElementType_<VT2>;
186  using MultType = MultTrait_<ET1,ET2>;
187 
188  BLAZE_INTERNAL_ASSERT( (~lhs).size() == (~rhs).size(), "Invalid vector sizes" );
189 
190  enum : size_t { SIMDSIZE = SIMDTrait<MultType>::size };
191 
192  if( (~lhs).size() == 0UL ) return MultType();
193 
194  Lhs left ( ~lhs );
195  Rhs right( ~rhs );
196 
197  const size_t N( left.size() );
198 
199  SIMDTrait_<MultType> xmm1, xmm2, xmm3, xmm4;
200  size_t i( 0UL );
201 
202  for( ; (i+SIMDSIZE*4UL) <= N; i+=SIMDSIZE*4UL ) {
203  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
204  xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
205  xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
206  xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
207  }
208  for( ; (i+SIMDSIZE*2UL) <= N; i+=SIMDSIZE*2UL ) {
209  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
210  xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
211  }
212  for( ; (i+SIMDSIZE) <= N; i+=SIMDSIZE ) {
213  xmm1 = xmm1 + ( left.load(i) * right.load(i) );
214  }
215 
216  MultType sp( sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
217 
218  for( ; i<N; ++i ) {
219  sp += left[i] * right[i];
220  }
221 
222  return sp;
223 }
225 //*************************************************************************************************
226 
227 
228 //*************************************************************************************************
254 template< typename VT1 // Type of the left-hand side dense vector
255  , typename VT2 > // Type of the right-hand side dense vector
256 inline decltype(auto)
257  operator*( const DenseVector<VT1,true>& lhs, const DenseVector<VT2,false>& rhs )
258 {
260 
261  if( (~lhs).size() != (~rhs).size() ) {
262  BLAZE_THROW_INVALID_ARGUMENT( "Vector sizes do not match" );
263  }
264 
265  return dvecdvecinner( ~lhs, ~rhs );
266 }
267 //*************************************************************************************************
268 
269 } // namespace blaze
270 
271 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:265
Header file for the IsSame and IsStrictlySame type traits.
Header file for the DenseVector base class.
System settings for performance optimizations.
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the HasSIMDAdd type trait.
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the HasSIMDMult type trait.
BLAZE_ALWAYS_INLINE ValueType_< T > sum(const SIMDi8< T > &a) noexcept
Returns the sum of all elements in the 8-bit integral SIMD vector.
Definition: Reduction.h:65
Header file for run time assertion macros.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for the RemoveReference type trait.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the function trace functionality.