35 #ifndef _BLAZE_MATH_SIMD_LOADU_H_ 36 #define _BLAZE_MATH_SIMD_LOADU_H_ 74 template<
typename T >
76 , If_< IsSigned<T>, SIMDint8, SIMDuint8 > >
77 loadu(
const T* address ) noexcept
80 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
82 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
100 template<
typename T >
103 loadu(
const complex<T>* address ) noexcept
108 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
109 #elif BLAZE_SSE2_MODE 110 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
136 template<
typename T >
142 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
143 #elif BLAZE_SSE2_MODE 144 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
162 template<
typename T >
165 loadu(
const complex<T>* address ) noexcept
170 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
171 #elif BLAZE_SSE2_MODE 172 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
198 template<
typename T >
204 __m512i v1 = _mm512_setzero_epi32();
205 v1 = _mm512_loadunpacklo_epi32( v1, address );
206 v1 = _mm512_loadunpackhi_epi32( v1, address+16UL );
208 #elif BLAZE_AVX2_MODE 209 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
210 #elif BLAZE_SSE2_MODE 211 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
229 template<
typename T >
232 loadu(
const complex<T>* address ) noexcept
237 __m512i v1 = _mm512_setzero_epi32();
238 v1 = _mm512_loadunpacklo_epi32( v1, address );
239 v1 = _mm512_loadunpackhi_epi32( v1, address+8UL );
241 #elif BLAZE_AVX2_MODE 242 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
243 #elif BLAZE_SSE2_MODE 244 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
270 template<
typename T >
276 __m512i v1 = _mm512_setzero_epi32();
277 v1 = _mm512_loadunpacklo_epi64( v1, address );
278 v1 = _mm512_loadunpackhi_epi64( v1, address+8UL );
280 #elif BLAZE_AVX2_MODE 281 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
282 #elif BLAZE_SSE2_MODE 283 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
301 template<
typename T >
304 loadu(
const complex<T>* address ) noexcept
309 __m512i v1 = _mm512_setzero_epi32();
310 v1 = _mm512_loadunpacklo_epi64( v1, address );
311 v1 = _mm512_loadunpackhi_epi64( v1, address+4UL );
313 #elif BLAZE_AVX2_MODE 314 return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
315 #elif BLAZE_SSE2_MODE 316 return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
318 return If_< IsSigned<T>, SIMDcint64,
SIMDcuint64 >( *address );
345 __m512 v1 = _mm512_setzero_ps();
346 v1 = _mm512_loadunpacklo_ps( v1, address );
347 v1 = _mm512_loadunpackhi_ps( v1, address+16UL );
350 return _mm256_loadu_ps( address );
352 return _mm_loadu_ps( address );
375 __m512 v1 = _mm512_setzero_ps();
376 v1 = _mm512_loadunpacklo_ps( v1, reinterpret_cast<const float*>( address ) );
377 v1 = _mm512_loadunpackhi_ps( v1, reinterpret_cast<const float*>( address+8UL ) );
380 return _mm256_loadu_ps( reinterpret_cast<const float*>( address ) );
382 return _mm_loadu_ps( reinterpret_cast<const float*>( address ) );
411 __m512d v1 = _mm512_setzero_pd();
412 v1 = _mm512_loadunpacklo_pd( v1, address );
413 v1 = _mm512_loadunpackhi_pd( v1, address+8UL );
416 return _mm256_loadu_pd( address );
417 #elif BLAZE_SSE2_MODE 418 return _mm_loadu_pd( address );
441 __m512d v1 = _mm512_setzero_pd();
442 v1 = _mm512_loadunpacklo_pd( v1, reinterpret_cast<const double*>( address ) );
443 v1 = _mm512_loadunpackhi_pd( v1, reinterpret_cast<const double*>( address+4UL ) );
446 return _mm256_loadu_pd( reinterpret_cast<const double*>( address ) );
447 #elif BLAZE_SSE2_MODE 448 return _mm_loadu_pd( reinterpret_cast<const double*>( address ) );
SIMD type for 16-bit signed integral data values.
SIMD type for 32-bit unsigned integral complex values.
SIMD type for 16-bit unsigned integral complex values.
SIMD type for 32-bit signed integral complex values.
Header file for the IsIntegral type trait.
Header file for the And class template.
SIMD type for 32-bit unsigned integral data values.
SIMD type for 64-bit double precision floating point data values.
BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral< T >, HasSize< T, 1UL > >, If_< IsSigned< T >, SIMDint8, SIMDuint8 > > loadu(const T *address) noexcept
Loads a vector of 1-byte integral values.
Definition: Loadu.h:77
SIMD type for 64-bit unsigned integral complex values.
SIMD type for 16-bit signed integral complex values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the If class template.
SIMD type for 16-bit unsigned integral data values.
SIMD type for 32-bit signed integral data values.
Header file for the EnableIf class template.
Header file for the basic SIMD types.
SIMD type for 8-bit signed integral complex values.
Header file for the HasSize type trait.
SIMD type for 32-bit single precision complex values.
SIMD type for 64-bit signed integral complex values.
Header file for the IsSigned type trait.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 64-bit unsigned integral data values.
SIMD type for 64-bit integral data values.
SIMD type for 8-bit unsigned integral complex values.
SIMD type for 64-bit double precision complex values.
System settings for the SSE mode.
Header file for the complex data type.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
System settings for the inline keywords.