Loadu.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_LOADU_H_
36 #define _BLAZE_MATH_SIMD_LOADU_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
44 #include <blaze/system/Inline.h>
46 #include <blaze/util/Complex.h>
47 #include <blaze/util/EnableIf.h>
48 #include <blaze/util/mpl/If.h>
53 
54 
55 namespace blaze {
56 
57 //=================================================================================================
58 //
59 // 8-BIT INTEGRAL SIMD TYPES
60 //
61 //=================================================================================================
62 
63 //*************************************************************************************************
73 template< typename T > // Type of the integral value
74 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,1UL>
75  , If_t< IsSigned_v<T>, SIMDint8, SIMDuint8 > >
76  loadu( const T* address ) noexcept
77 {
78 #if BLAZE_AVX512BW_MODE
79  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
80 #elif BLAZE_AVX2_MODE
81  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
82 #elif BLAZE_SSE2_MODE
83  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
84 #else
85  return *address;
86 #endif
87 }
88 //*************************************************************************************************
89 
90 
91 //*************************************************************************************************
101 template< typename T > // Type of the integral value
102 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,1UL>
103  , If_t< IsSigned_v<T>, SIMDcint8, SIMDcuint8 > >
104  loadu( const complex<T>* address ) noexcept
105 {
106  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
107 
108 #if BLAZE_AVX512BW_MODE
109  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
110 #elif BLAZE_AVX2_MODE
111  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
112 #elif BLAZE_SSE2_MODE
113  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
114 #else
115  return If_t< IsSigned_v<T>, SIMDcint8, SIMDcuint8 >( *address );
116 #endif
117 }
118 //*************************************************************************************************
119 
120 
121 
122 
123 //=================================================================================================
124 //
125 // 16-BIT INTEGRAL SIMD TYPES
126 //
127 //=================================================================================================
128 
129 //*************************************************************************************************
139 template< typename T > // Type of the integral value
140 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,2UL>
141  , If_t< IsSigned_v<T>, SIMDint16, SIMDuint16 > >
142  loadu( const T* address ) noexcept
143 {
144 #if BLAZE_AVX512BW_MODE
145  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
146 #elif BLAZE_AVX2_MODE
147  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
148 #elif BLAZE_SSE2_MODE
149  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
150 #else
151  return *address;
152 #endif
153 }
154 //*************************************************************************************************
155 
156 
157 //*************************************************************************************************
167 template< typename T > // Type of the integral value
168 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,2UL>
169  , If_t< IsSigned_v<T>, SIMDcint16, SIMDcuint16 > >
170  loadu( const complex<T>* address ) noexcept
171 {
172  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
173 
174 #if BLAZE_AVX512BW_MODE
175  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
176 #elif BLAZE_AVX2_MODE
177  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
178 #elif BLAZE_SSE2_MODE
179  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
180 #else
181  return If_t< IsSigned_v<T>, SIMDcint16, SIMDcuint16 >( *address );
182 #endif
183 }
184 //*************************************************************************************************
185 
186 
187 
188 
189 //=================================================================================================
190 //
191 // 32-BIT INTEGRAL SIMD TYPES
192 //
193 //=================================================================================================
194 
195 //*************************************************************************************************
205 template< typename T > // Type of the integral value
206 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,4UL>
207  , If_t< IsSigned_v<T>, SIMDint32, SIMDuint32 > >
208  loadu( const T* address ) noexcept
209 {
210 #if BLAZE_AVX512F_MODE
211  return _mm512_maskz_loadu_epi32( 0xFFFF, reinterpret_cast<const __m512i*>( address ) );
212 #elif BLAZE_MIC_MODE
213  __m512i v1 = _mm512_setzero_si512();
214  v1 = _mm512_loadunpacklo_epi32( v1, address );
215  v1 = _mm512_loadunpackhi_epi32( v1, address+16UL );
216  return v1;
217 #elif BLAZE_AVX2_MODE
218  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
219 #elif BLAZE_SSE2_MODE
220  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
221 #else
222  return If_t< IsSigned_v<T>, SIMDint32, SIMDuint32 >( *address );
223 #endif
224 }
225 //*************************************************************************************************
226 
227 
228 //*************************************************************************************************
238 template< typename T > // Type of the integral value
239 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,4UL>
240  , If_t< IsSigned_v<T>, SIMDcint32, SIMDcuint32 > >
241  loadu( const complex<T>* address ) noexcept
242 {
243  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
244 
245 #if BLAZE_AVX512F_MODE
246  return _mm512_maskz_loadu_epi32( 0xFFFF, address );
247 #elif BLAZE_MIC_MODE
248  __m512i v1 = _mm512_setzero_si512();
249  v1 = _mm512_loadunpacklo_epi32( v1, address );
250  v1 = _mm512_loadunpackhi_epi32( v1, address+8UL );
251  return v1;
252 #elif BLAZE_AVX2_MODE
253  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
254 #elif BLAZE_SSE2_MODE
255  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
256 #else
257  return *address;
258 #endif
259 }
260 //*************************************************************************************************
261 
262 
263 
264 
265 //=================================================================================================
266 //
267 // 64-BIT INTEGRAL SIMD TYPES
268 //
269 //=================================================================================================
270 
271 //*************************************************************************************************
281 template< typename T > // Type of the integral value
282 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,8UL>
283  , If_t< IsSigned_v<T>, SIMDint64, SIMDuint64 > >
284  loadu( const T* address ) noexcept
285 {
286 #if BLAZE_AVX512F_MODE
287  return _mm512_maskz_loadu_epi64( 0xFF, address );
288 #elif BLAZE_MIC_MODE
289  __m512i v1 = _mm512_setzero_si512();
290  v1 = _mm512_loadunpacklo_epi64( v1, address );
291  v1 = _mm512_loadunpackhi_epi64( v1, address+8UL );
292  return v1;
293 #elif BLAZE_AVX2_MODE
294  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
295 #elif BLAZE_SSE2_MODE
296  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
297 #else
298  return *address;
299 #endif
300 }
301 //*************************************************************************************************
302 
303 
304 //*************************************************************************************************
314 template< typename T > // Type of the integral value
315 BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v<T> && HasSize_v<T,8UL>
316  , If_t< IsSigned_v<T>, SIMDcint64, SIMDcuint64 > >
317  loadu( const complex<T>* address ) noexcept
318 {
319  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
320 
321 #if BLAZE_AVX512F_MODE
322  return _mm512_maskz_loadu_epi64( 0xFF, address );
323 #elif BLAZE_MIC_MODE
324  __m512i v1 = _mm512_setzero_si512();
325  v1 = _mm512_loadunpacklo_epi64( v1, address );
326  v1 = _mm512_loadunpackhi_epi64( v1, address+4UL );
327  return v1;
328 #elif BLAZE_AVX2_MODE
329  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
330 #elif BLAZE_SSE2_MODE
331  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
332 #else
333  return If_t< IsSigned_v<T>, SIMDcint64, SIMDcuint64 >( *address );
334 #endif
335 }
336 //*************************************************************************************************
337 
338 
339 
340 
341 //=================================================================================================
342 //
343 // 32-BIT FLOATING POINT SIMD TYPES
344 //
345 //=================================================================================================
346 
347 //*************************************************************************************************
357 BLAZE_ALWAYS_INLINE const SIMDfloat loadu( const float* address ) noexcept
358 {
359 #if BLAZE_AVX512F_MODE
360  return _mm512_loadu_ps( address );
361 #elif BLAZE_MIC_MODE
362  __m512 v1 = _mm512_setzero_ps();
363  v1 = _mm512_loadunpacklo_ps( v1, address );
364  v1 = _mm512_loadunpackhi_ps( v1, address+16UL );
365  return v1;
366 #elif BLAZE_AVX_MODE
367  return _mm256_loadu_ps( address );
368 #elif BLAZE_SSE_MODE
369  return _mm_loadu_ps( address );
370 #else
371  return *address;
372 #endif
373 }
374 //*************************************************************************************************
375 
376 
377 //*************************************************************************************************
387 BLAZE_ALWAYS_INLINE const SIMDcfloat loadu( const complex<float>* address ) noexcept
388 {
389  BLAZE_STATIC_ASSERT( sizeof( complex<float> ) == 2UL*sizeof( float ) );
390 
391 #if BLAZE_AVX512F_MODE
392  return _mm512_loadu_ps( address );
393 #elif BLAZE_MIC_MODE
394  __m512 v1 = _mm512_setzero_ps();
395  v1 = _mm512_loadunpacklo_ps( v1, reinterpret_cast<const float*>( address ) );
396  v1 = _mm512_loadunpackhi_ps( v1, reinterpret_cast<const float*>( address+8UL ) );
397  return v1;
398 #elif BLAZE_AVX_MODE
399  return _mm256_loadu_ps( reinterpret_cast<const float*>( address ) );
400 #elif BLAZE_SSE_MODE
401  return _mm_loadu_ps( reinterpret_cast<const float*>( address ) );
402 #else
403  return *address;
404 #endif
405 }
406 //*************************************************************************************************
407 
408 
409 
410 
411 //=================================================================================================
412 //
413 // 64-BIT FLOATING POINT SIMD TYPES
414 //
415 //=================================================================================================
416 
417 //*************************************************************************************************
427 BLAZE_ALWAYS_INLINE const SIMDdouble loadu( const double* address ) noexcept
428 {
429 #if BLAZE_AVX512F_MODE
430  return _mm512_loadu_pd( address );
431 #elif BLAZE_MIC_MODE
432  __m512d v1 = _mm512_setzero_pd();
433  v1 = _mm512_loadunpacklo_pd( v1, address );
434  v1 = _mm512_loadunpackhi_pd( v1, address+8UL );
435  return v1;
436 #elif BLAZE_AVX_MODE
437  return _mm256_loadu_pd( address );
438 #elif BLAZE_SSE2_MODE
439  return _mm_loadu_pd( address );
440 #else
441  return *address;
442 #endif
443 }
444 //*************************************************************************************************
445 
446 
447 //*************************************************************************************************
457 BLAZE_ALWAYS_INLINE const SIMDcdouble loadu( const complex<double>* address ) noexcept
458 {
459  BLAZE_STATIC_ASSERT( sizeof( complex<double> ) == 2UL*sizeof( double ) );
460 
461 #if BLAZE_AVX512F_MODE
462  return _mm512_loadu_pd( address );
463 #elif BLAZE_MIC_MODE
464  __m512d v1 = _mm512_setzero_pd();
465  v1 = _mm512_loadunpacklo_pd( v1, reinterpret_cast<const double*>( address ) );
466  v1 = _mm512_loadunpackhi_pd( v1, reinterpret_cast<const double*>( address+4UL ) );
467  return v1;
468 #elif BLAZE_AVX_MODE
469  return _mm256_loadu_pd( reinterpret_cast<const double*>( address ) );
470 #elif BLAZE_SSE2_MODE
471  return _mm_loadu_pd( reinterpret_cast<const double*>( address ) );
472 #else
473  return *address;
474 #endif
475 }
476 //*************************************************************************************************
477 
478 } // namespace blaze
479 
480 #endif
typename If< Condition, T1, T2 >::Type If_t
Auxiliary alias declaration for the If class template.The If_t alias declaration provides a convenien...
Definition: If.h:109
SIMD type for 16-bit unsigned integral complex values.
BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v< T > &&HasSize_v< T, 1UL >, If_t< IsSigned_v< T >, SIMDint8, SIMDuint8 > > loadu(const T *address) noexcept
Loads a vector of 1-byte integral values.
Definition: Loadu.h:76
Header file for the IsIntegral type trait.
SIMD type for 32-bit unsigned integral data values.
SIMD type for 64-bit double precision floating point data values.
SIMD type for 64-bit unsigned integral complex values.
SIMD type for 16-bit signed integral complex values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the If class template.
Compile time assertion.
SIMD type for 32-bit signed integral data values.
Header file for the EnableIf class template.
Header file for the basic SIMD types.
SIMD type for 8-bit signed integral complex values.
Header file for the HasSize type trait.
SIMD type for 32-bit single precision complex values.
SIMD type for 64-bit signed integral complex values.
Header file for the IsSigned type trait.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 8-bit unsigned integral complex values.
SIMD type for 64-bit double precision complex values.
System settings for the SSE mode.
Header file for the complex data type.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
System settings for the inline keywords.