35#ifndef _BLAZE_MATH_SIMD_STOREU_H_
36#define _BLAZE_MATH_SIMD_STOREU_H_
75 storeu( T1* address,
const SIMDi8<T2>& value )
noexcept
77#if BLAZE_AVX512BW_MODE
78 _mm512_storeu_si512( address, (*value).value );
80 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
82 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
84 *address = (*value).value;
104 storeu( complex<T1>* address,
const SIMDci8<T2>& value )
noexcept
108#if BLAZE_AVX512BW_MODE
109 _mm512_storeu_si512( address, (*value).value );
111 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
113 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
115 *address = (*value).value;
143 storeu( T1* address,
const SIMDi16<T2>& value )
noexcept
145#if BLAZE_AVX512BW_MODE
146 _mm512_storeu_si512( address, (*value).value );
148 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
150 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
152 *address = (*value).value;
172 storeu( complex<T1>* address,
const SIMDci16<T2>& value )
noexcept
176#if BLAZE_AVX512BW_MODE
177 _mm512_storeu_si512( address, (*value).value );
179 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
181 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
183 *address = (*value).value;
211 storeu( T1* address,
const SIMDi32<T2>& value )
noexcept
213#if BLAZE_AVX512F_MODE
214 _mm512_mask_storeu_epi32( address, 0xFFFF, (*value).value );
216 _mm512_packstorelo_epi32( address , (*value).value );
217 _mm512_packstorehi_epi32( address+16UL, (*value).value );
219 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
221 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
223 *address = (*value).value;
243 storeu( complex<T1>* address,
const SIMDci32<T2>& value )
noexcept
247#if BLAZE_AVX512F_MODE
248 _mm512_mask_storeu_epi32( address, 0xFFFF, (*value).value );
250 _mm512_packstorelo_epi32( address , (*value).value );
251 _mm512_packstorehi_epi32( address+8UL, (*value).value );
253 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
255 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
257 *address = (*value).value;
285 storeu( T1* address,
const SIMDi64<T2>& value )
noexcept
287#if BLAZE_AVX512F_MODE
288 _mm512_mask_storeu_epi64( address, 0xFF, (*value).value );
290 _mm512_packstorelo_epi64( address , (*value).value );
291 _mm512_packstorehi_epi64( address+8UL, (*value).value );
293 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
295 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
297 *address = (*value).value;
317 storeu( complex<T1>* address,
const SIMDci64<T2>& value )
noexcept
321#if BLAZE_AVX512F_MODE
322 _mm512_mask_storeu_epi64( address, 0xFF, (*value).value );
324 _mm512_packstorelo_epi64( address , (*value).value );
325 _mm512_packstorehi_epi64( address+4UL, (*value).value );
327 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>( address ), (*value).value );
329 _mm_storeu_si128(
reinterpret_cast<__m128i*
>( address ), (*value).value );
331 *address = (*value).value;
356template<
typename T >
359#if BLAZE_AVX512F_MODE
360 _mm512_storeu_ps( address, (*value).value );
362 const SIMDfloat tmp( (*value).eval().value );
363 _mm512_packstorelo_ps( address , tmp.value );
364 _mm512_packstorehi_ps( address+16UL, tmp.value );
366 _mm256_storeu_ps( address, (*value).eval().value );
368 _mm_storeu_ps( address, (*value).eval().value );
370 *address = (*value).eval().value;
391#if BLAZE_AVX512F_MODE
392 _mm512_storeu_ps( address, (*value).value );
394 _mm512_packstorelo_ps(
reinterpret_cast<float*
>( address ), value.value );
395 _mm512_packstorehi_ps(
reinterpret_cast<float*
>( address+8UL ), value.value );
397 _mm256_storeu_ps(
reinterpret_cast<float*
>( address ), value.value );
399 _mm_storeu_ps(
reinterpret_cast<float*
>( address ), value.value );
401 *address = value.value;
426template<
typename T >
429#if BLAZE_AVX512F_MODE
430 _mm512_storeu_pd( address, (*value).value );
432 const SIMDdouble tmp( (*value).eval().value );
433 _mm512_packstorelo_pd( address , tmp.value );
434 _mm512_packstorehi_pd( address+8UL, tmp.value );
436 _mm256_storeu_pd( address, (*value).eval().value );
438 _mm_storeu_pd( address, (*value).eval().value );
440 *address = (*value).eval().value;
461#if BLAZE_AVX512F_MODE
462 _mm512_storeu_pd( address, (*value).value );
464 _mm512_packstorelo_pd(
reinterpret_cast<double*
>( address ), value.value );
465 _mm512_packstorehi_pd(
reinterpret_cast<double*
>( address+4UL ), value.value );
467 _mm256_storeu_pd(
reinterpret_cast<double*
>( address ), value.value );
469 _mm_storeu_pd(
reinterpret_cast<double*
>( address ), value.value );
471 *address = value.value;
Header file for the basic SIMD types.
Header file for the complex data type.
Header file for the EnableIf class template.
Header file for the HasSize type trait.
Header file for the IsIntegral type trait.
SIMD type for 64-bit double precision complex values.
SIMD type for 32-bit single precision complex values.
SIMD type for 64-bit double precision floating point data values.
SIMD type for 32-bit single precision floating point data values.
BLAZE_ALWAYS_INLINE void storeu(complex< double > *address, const SIMDcdouble &value) noexcept
Unaligned store of a vector of 'complex<double>' values.
Definition: Storeu.h:457
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.
Definition: StaticAssert.h:112
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
System settings for the inline keywords.
System settings for the SSE mode.