TDVecDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/Exception.h>
46 #include <blaze/math/SIMD.h>
51 #include <blaze/util/Assert.h>
52 #include <blaze/util/DisableIf.h>
53 #include <blaze/util/EnableIf.h>
55 #include <blaze/util/Types.h>
58 
59 
60 namespace blaze {
61 
62 //=================================================================================================
63 //
64 // CLASS DEFINITION
65 //
66 //=================================================================================================
67 
68 //*************************************************************************************************
73 template< typename T1 // Type of the left-hand side dense vector
74  , typename T2 > // Type of the right-hand side dense vector
75 struct TDVecDVecMultExprHelper
76 {
77  //**Type definitions****************************************************************************
79  typedef RemoveReference_< CompositeType_<T1> > CT1;
80 
82  typedef RemoveReference_< CompositeType_<T2> > CT2;
83  //**********************************************************************************************
84 
85  //**********************************************************************************************
86  enum : bool { value = useOptimizedKernels &&
87  CT1::simdEnabled &&
88  CT2::simdEnabled &&
89  IsSame< ElementType_<CT1>, ElementType_<CT2> >::value &&
90  HasSIMDAdd< ElementType_<CT1>, ElementType_<CT1> >::value &&
91  HasSIMDMult< ElementType_<CT1>, ElementType_<CT1> >::value };
92  //**********************************************************************************************
93 };
95 //*************************************************************************************************
96 
97 
98 
99 
100 //=================================================================================================
101 //
102 // GLOBAL BINARY ARITHMETIC OPERATORS
103 //
104 //=================================================================================================
105 
106 //*************************************************************************************************
132 template< typename T1 // Type of the left-hand side dense vector
133  , typename T2 > // Type of the right-hand side dense vector
134 inline DisableIf_< TDVecDVecMultExprHelper<T1,T2>
135  , const MultTrait_< ElementType_<T1>, ElementType_<T2> > >
137 {
139 
140  if( (~lhs).size() != (~rhs).size() ) {
141  BLAZE_THROW_INVALID_ARGUMENT( "Vector sizes do not match" );
142  }
143 
144  typedef CompositeType_<T1> Lhs;
145  typedef CompositeType_<T2> Rhs;
146  typedef ElementType_<T1> ET1;
147  typedef ElementType_<T2> ET2;
148  typedef MultTrait_<ET1,ET2> MultType;
149 
150  if( (~lhs).size() == 0UL ) return MultType();
151 
152  Lhs left ( ~lhs );
153  Rhs right( ~rhs );
154 
155  MultType sp( left[0UL] * right[0UL] );
156  size_t i( 1UL );
157 
158  for( ; (i+4UL) <= left.size(); i+=4UL ) {
159  sp += left[i ] * right[i ] +
160  left[i+1UL] * right[i+1UL] +
161  left[i+2UL] * right[i+2UL] +
162  left[i+3UL] * right[i+3UL];
163  }
164  for( ; (i+2UL) <= left.size(); i+=2UL ) {
165  sp += left[i ] * right[i ] +
166  left[i+1UL] * right[i+1UL];
167  }
168  for( ; i<left.size(); ++i ) {
169  sp += left[i] * right[i];
170  }
171 
172  return sp;
173 }
174 //*************************************************************************************************
175 
176 
177 //*************************************************************************************************
205 template< typename T1 // Type of the left-hand side dense vector
206  , typename T2 > // Type of the right-hand side dense vector
207 inline EnableIf_< TDVecDVecMultExprHelper<T1,T2>
208  , const MultTrait_< ElementType_<T1>, ElementType_<T2> > >
210 {
212 
213  if( (~lhs).size() != (~rhs).size() ) {
214  BLAZE_THROW_INVALID_ARGUMENT( "Vector sizes do not match" );
215  }
216 
217  typedef CompositeType_<T1> Lhs;
218  typedef CompositeType_<T2> Rhs;
219  typedef ElementType_<T1> ET1;
220  typedef ElementType_<T2> ET2;
221  typedef MultTrait_<ET1,ET2> MultType;
222 
223  enum : size_t { SIMDSIZE = SIMDTrait<MultType>::size };
224 
225  if( (~lhs).size() == 0UL ) return MultType();
226 
227  Lhs left ( ~lhs );
228  Rhs right( ~rhs );
229 
230  const size_t N( left.size() );
231 
232  SIMDTrait_<MultType> xmm1, xmm2, xmm3, xmm4;
233  size_t i( 0UL );
234 
235  for( ; (i+SIMDSIZE*4UL) <= N; i+=SIMDSIZE*4UL ) {
236  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
237  xmm2 = xmm2 + ( left.load(i+SIMDSIZE ) * right.load(i+SIMDSIZE ) );
238  xmm3 = xmm3 + ( left.load(i+SIMDSIZE*2UL) * right.load(i+SIMDSIZE*2UL) );
239  xmm4 = xmm4 + ( left.load(i+SIMDSIZE*3UL) * right.load(i+SIMDSIZE*3UL) );
240  }
241  for( ; (i+SIMDSIZE*2UL) <= N; i+=SIMDSIZE*2UL ) {
242  xmm1 = xmm1 + ( left.load(i ) * right.load(i ) );
243  xmm2 = xmm2 + ( left.load(i+SIMDSIZE) * right.load(i+SIMDSIZE) );
244  }
245  for( ; (i+SIMDSIZE) <= N; i+=SIMDSIZE ) {
246  xmm1 = xmm1 + ( left.load(i) * right.load(i) );
247  }
248 
249  MultType sp( sum( xmm1 + xmm2 + xmm3 + xmm4 ) );
250 
251  for( ; i<N; ++i ) {
252  sp += left[i] * right[i];
253  }
254 
255  return sp;
256 }
257 //*************************************************************************************************
258 
259 } // namespace blaze
260 
261 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
constexpr bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7800
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE const complex< int8_t > sum(const SIMDcint8 &a) noexcept
Returns the sum of all elements in the 8-bit integral complex SIMD vector.
Definition: Reduction.h:63
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
Header file for the IsSame and IsStrictlySame type traits.
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:315
Header file for the DenseVector base class.
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:245
System settings for performance optimizations.
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Header file for the DisableIf class template.
Header file for the multiplication trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the HasSIMDAdd type trait.
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
Header file for the exception macros of the math module.
Header file for the EnableIf class template.
Header file for the HasSIMDMult type trait.
Header file for run time assertion macros.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
Header file for the RemoveReference type trait.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the FunctionTrace class.