22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDVECMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDVECDVECMULTEXPR_H_
57 struct TDVecDVecMultExprHelper
61 typedef typename RemoveReference< typename T1::CompositeType >::Type CT1;
64 typedef typename RemoveReference< typename T2::CompositeType >::Type CT2;
68 enum { value = CT1::vectorizable &&
70 IsSame< typename CT1::ElementType, typename CT2::ElementType>::value &&
71 IntrinsicTrait< typename CT1::ElementType >::addition &&
72 IntrinsicTrait< typename CT2::ElementType >::multiplication };
113 template<
typename T1
115 inline typename DisableIf< TDVecDVecMultExprHelper<T1,T2>,
116 const typename MultTrait<typename T1::ElementType,typename T2::ElementType>::Type >::Type
121 if( (~lhs).size() != (~rhs).size() )
122 throw std::invalid_argument(
"Vector sizes do not match" );
124 typedef typename T1::CompositeType Lhs;
125 typedef typename T2::CompositeType Rhs;
126 typedef typename T1::ElementType ET1;
127 typedef typename T2::ElementType ET2;
130 if( (~lhs).size() == 0UL )
return MultType();
135 MultType sp( left[0UL] * right[0UL] );
137 for(
size_t i=1UL; i<left.size(); ++i )
138 sp += left[i] * right[i];
173 template<
typename T1
175 inline typename EnableIf< TDVecDVecMultExprHelper<T1,T2>,
176 const typename MultTrait<typename T1::ElementType,typename T2::ElementType>::Type >::Type
181 if( (~lhs).size() != (~rhs).size() )
182 throw std::invalid_argument(
"Vector sizes do not match" );
184 typedef typename T1::CompositeType Lhs;
185 typedef typename T2::CompositeType Rhs;
186 typedef typename T1::ElementType ET1;
187 typedef typename T2::ElementType ET2;
191 if( (~lhs).size() == 0UL )
return MultType();
196 typename IT::Type xmm1, xmm2, xmm3, xmm4;
199 const size_t N ( left.size() );
200 const size_t end( N - N % (IT::size*4UL) );
202 for(
size_t i=0UL; i<end; i+=IT::size*4UL ) {
203 xmm1 = xmm1 + ( left.get(i ) * right.get(i ) );
204 xmm2 = xmm2 + ( left.get(i+IT::size ) * right.get(i+IT::size ) );
205 xmm3 = xmm3 + ( left.get(i+IT::size*2UL) * right.get(i+IT::size*2UL) );
206 xmm4 = xmm4 + ( left.get(i+IT::size*3UL) * right.get(i+IT::size*3UL) );
209 MultType array[IT::size];
210 store( array, xmm1 + xmm2 + xmm3 + xmm4 );
212 for(
size_t i=0UL; i<IT::size; ++i )
214 for(
size_t i=end; i<N; ++i )
215 sp += left[i] * right[i];