35 #ifndef _BLAZE_MATH_SIMD_LOADU_H_
36 #define _BLAZE_MATH_SIMD_LOADU_H_
74 template<
typename T >
76 , If_< IsSigned<T>, SIMDint8, SIMDuint8 > >
77 loadu(
const T* address ) noexcept
80 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
82 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
100 template<
typename T >
102 , If_< IsSigned<T>, SIMDcint8, SIMDcuint8 > >
103 loadu(
const complex<T>* address ) noexcept
108 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
109 #elif BLAZE_SSE2_MODE
110 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
136 template<
typename T >
138 , If_< IsSigned<T>, SIMDint16, SIMDuint16 > >
142 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
143 #elif BLAZE_SSE2_MODE
144 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
162 template<
typename T >
164 , If_< IsSigned<T>, SIMDcint16, SIMDcuint16 > >
165 loadu(
const complex<T>* address ) noexcept
170 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
171 #elif BLAZE_SSE2_MODE
172 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
198 template<
typename T >
200 , If_< IsSigned<T>, SIMDint32, SIMDuint32 > >
204 __m512i v1 = _mm512_setzero_epi32();
205 v1 = _mm512_loadunpacklo_epi32( v1, address );
206 v1 = _mm512_loadunpackhi_epi32( v1, address+16UL );
208 #elif BLAZE_AVX2_MODE
209 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
210 #elif BLAZE_SSE2_MODE
211 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
229 template<
typename T >
231 , If_< IsSigned<T>, SIMDcint32, SIMDcuint32 > >
232 loadu(
const complex<T>* address ) noexcept
237 __m512i v1 = _mm512_setzero_epi32();
238 v1 = _mm512_loadunpacklo_epi32( v1, address );
239 v1 = _mm512_loadunpackhi_epi32( v1, address+8UL );
241 #elif BLAZE_AVX2_MODE
242 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
243 #elif BLAZE_SSE2_MODE
244 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
270 template<
typename T >
272 , If_< IsSigned<T>, SIMDint64, SIMDuint64 > >
276 __m512i v1 = _mm512_setzero_epi32();
277 v1 = _mm512_loadunpacklo_epi64( v1, address );
278 v1 = _mm512_loadunpackhi_epi64( v1, address+8UL );
280 #elif BLAZE_AVX2_MODE
281 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
282 #elif BLAZE_SSE2_MODE
283 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
301 template<
typename T >
303 , If_< IsSigned<T>, SIMDcint64, SIMDcuint64 > >
304 loadu(
const complex<T>* address ) noexcept
309 __m512i v1 = _mm512_setzero_epi32();
310 v1 = _mm512_loadunpacklo_epi64( v1, address );
311 v1 = _mm512_loadunpackhi_epi64( v1, address+4UL );
313 #elif BLAZE_AVX2_MODE
314 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
315 #elif BLAZE_SSE2_MODE
316 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
318 return If_< IsSigned<T>, SIMDcint64,
SIMDcuint64 >( *address );
345 __m512 v1 = _mm512_setzero_ps();
346 v1 = _mm512_loadunpacklo_ps( v1, address );
347 v1 = _mm512_loadunpackhi_ps( v1, address+16UL );
350 return _mm256_loadu_ps( address );
352 return _mm_loadu_ps( address );
375 __m512 v1 = _mm512_setzero_ps();
376 v1 = _mm512_loadunpacklo_ps( v1, reinterpret_cast<const float*>( address ) );
377 v1 = _mm512_loadunpackhi_ps( v1, reinterpret_cast<const float*>( address+8UL ) );
380 return _mm256_loadu_ps( reinterpret_cast<const float*>( address ) );
382 return _mm_loadu_ps( reinterpret_cast<const float*>( address ) );
411 __m512d v1 = _mm512_setzero_pd();
412 v1 = _mm512_loadunpacklo_pd( v1, address );
413 v1 = _mm512_loadunpackhi_pd( v1, address+8UL );
416 return _mm256_loadu_pd( address );
417 #elif BLAZE_SSE2_MODE
418 return _mm_loadu_pd( address );
441 __m512d v1 = _mm512_setzero_pd();
442 v1 = _mm512_loadunpacklo_pd( v1, reinterpret_cast<const double*>( address ) );
443 v1 = _mm512_loadunpackhi_pd( v1, reinterpret_cast<const double*>( address+4UL ) );
446 return _mm256_loadu_pd( reinterpret_cast<const double*>( address ) );
447 #elif BLAZE_SSE2_MODE
448 return _mm_loadu_pd( reinterpret_cast<const double*>( address ) );
Header file for the IsIntegral type trait.
Header file for the And class template.
SIMD type for 64-bit double precision floating point data values.
BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral< T >, HasSize< T, 1UL > >, If_< IsSigned< T >, SIMDint8, SIMDuint8 > > loadu(const T *address) noexcept
Loads a vector of 1-byte integral values.
Definition: Loadu.h:77
SIMD type for 64-bit unsigned integral complex values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the If class template.
Header file for the EnableIf class template.
Header file for the basic SIMD types.
Header file for the HasSize type trait.
SIMD type for 32-bit single precision complex values.
Header file for the IsSigned type trait.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 64-bit double precision complex values.
System settings for the SSE mode.
Header file for the complex data type.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
System settings for the inline keywords.