Loadu.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_LOADU_H_
36 #define _BLAZE_MATH_SIMD_LOADU_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
44 #include <blaze/system/Inline.h>
46 #include <blaze/util/Complex.h>
47 #include <blaze/util/EnableIf.h>
48 #include <blaze/util/mpl/And.h>
49 #include <blaze/util/mpl/If.h>
54 
55 
56 namespace blaze {
57 
58 //=================================================================================================
59 //
60 // 8-BIT INTEGRAL SIMD TYPES
61 //
62 //=================================================================================================
63 
64 //*************************************************************************************************
74 template< typename T > // Type of the integral value
75 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,1UL> >
76  , If_< IsSigned<T>, SIMDint8, SIMDuint8 > >
77  loadu( const T* address ) noexcept
78 {
79 #if BLAZE_AVX512BW_MODE
80  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
81 #elif BLAZE_AVX2_MODE
82  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
83 #elif BLAZE_SSE2_MODE
84  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
85 #else
86  return *address;
87 #endif
88 }
89 //*************************************************************************************************
90 
91 
92 //*************************************************************************************************
102 template< typename T > // Type of the integral value
103 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,1UL> >
104  , If_< IsSigned<T>, SIMDcint8, SIMDcuint8 > >
105  loadu( const complex<T>* address ) noexcept
106 {
107  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
108 
109 #if BLAZE_AVX512BW_MODE
110  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
111 #elif BLAZE_AVX2_MODE
112  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
113 #elif BLAZE_SSE2_MODE
114  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
115 #else
116  return *address;
117 #endif
118 }
119 //*************************************************************************************************
120 
121 
122 
123 
124 //=================================================================================================
125 //
126 // 16-BIT INTEGRAL SIMD TYPES
127 //
128 //=================================================================================================
129 
130 //*************************************************************************************************
140 template< typename T > // Type of the integral value
141 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,2UL> >
142  , If_< IsSigned<T>, SIMDint16, SIMDuint16 > >
143  loadu( const T* address ) noexcept
144 {
145 #if BLAZE_AVX512BW_MODE
146  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
147 #elif BLAZE_AVX2_MODE
148  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
149 #elif BLAZE_SSE2_MODE
150  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
151 #else
152  return *address;
153 #endif
154 }
155 //*************************************************************************************************
156 
157 
158 //*************************************************************************************************
168 template< typename T > // Type of the integral value
169 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,2UL> >
170  , If_< IsSigned<T>, SIMDcint16, SIMDcuint16 > >
171  loadu( const complex<T>* address ) noexcept
172 {
173  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
174 
175 #if BLAZE_AVX512BW_MODE
176  return _mm512_loadu_si512( reinterpret_cast<const __m512i*>( address ) );
177 #elif BLAZE_AVX2_MODE
178  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
179 #elif BLAZE_SSE2_MODE
180  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
181 #else
182  return *address;
183 #endif
184 }
185 //*************************************************************************************************
186 
187 
188 
189 
190 //=================================================================================================
191 //
192 // 32-BIT INTEGRAL SIMD TYPES
193 //
194 //=================================================================================================
195 
196 //*************************************************************************************************
206 template< typename T > // Type of the integral value
207 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,4UL> >
208  , If_< IsSigned<T>, SIMDint32, SIMDuint32 > >
209  loadu( const T* address ) noexcept
210 {
211 #if BLAZE_AVX512F_MODE
212  return _mm512_maskz_loadu_epi32( 0xFFFF, reinterpret_cast<const __m512i*>( address ) );
213 #elif BLAZE_MIC_MODE
214  __m512i v1 = _mm512_setzero_epi32();
215  v1 = _mm512_loadunpacklo_epi32( v1, address );
216  v1 = _mm512_loadunpackhi_epi32( v1, address+16UL );
217  return v1;
218 #elif BLAZE_AVX2_MODE
219  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
220 #elif BLAZE_SSE2_MODE
221  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
222 #else
223  return *address;
224 #endif
225 }
226 //*************************************************************************************************
227 
228 
229 //*************************************************************************************************
239 template< typename T > // Type of the integral value
240 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,4UL> >
241  , If_< IsSigned<T>, SIMDcint32, SIMDcuint32 > >
242  loadu( const complex<T>* address ) noexcept
243 {
244  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
245 
246 #if BLAZE_AVX512F_MODE
247  return _mm512_maskz_loadu_epi32( 0xFFFF, address );
248 #elif BLAZE_MIC_MODE
249  __m512i v1 = _mm512_setzero_epi32();
250  v1 = _mm512_loadunpacklo_epi32( v1, address );
251  v1 = _mm512_loadunpackhi_epi32( v1, address+8UL );
252  return v1;
253 #elif BLAZE_AVX2_MODE
254  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
255 #elif BLAZE_SSE2_MODE
256  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
257 #else
258  return *address;
259 #endif
260 }
261 //*************************************************************************************************
262 
263 
264 
265 
266 //=================================================================================================
267 //
268 // 64-BIT INTEGRAL SIMD TYPES
269 //
270 //=================================================================================================
271 
272 //*************************************************************************************************
282 template< typename T > // Type of the integral value
283 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,8UL> >
284  , If_< IsSigned<T>, SIMDint64, SIMDuint64 > >
285  loadu( const T* address ) noexcept
286 {
287 #if BLAZE_AVX512F_MODE
288  return _mm512_maskz_loadu_epi64( 0xFF, address );
289 #elif BLAZE_MIC_MODE
290  __m512i v1 = _mm512_setzero_epi32();
291  v1 = _mm512_loadunpacklo_epi64( v1, address );
292  v1 = _mm512_loadunpackhi_epi64( v1, address+8UL );
293  return v1;
294 #elif BLAZE_AVX2_MODE
295  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
296 #elif BLAZE_SSE2_MODE
297  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
298 #else
299  return *address;
300 #endif
301 }
302 //*************************************************************************************************
303 
304 
305 //*************************************************************************************************
315 template< typename T > // Type of the integral value
316 BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral<T>, HasSize<T,8UL> >
317  , If_< IsSigned<T>, SIMDcint64, SIMDcuint64 > >
318  loadu( const complex<T>* address ) noexcept
319 {
320  BLAZE_STATIC_ASSERT( sizeof( complex<T> ) == 2UL*sizeof( T ) );
321 
322 #if BLAZE_AVX512F_MODE
323  return _mm512_maskz_loadu_epi64( 0xFF, address );
324 #elif BLAZE_MIC_MODE
325  __m512i v1 = _mm512_setzero_epi32();
326  v1 = _mm512_loadunpacklo_epi64( v1, address );
327  v1 = _mm512_loadunpackhi_epi64( v1, address+4UL );
328  return v1;
329 #elif BLAZE_AVX2_MODE
330  return _mm256_loadu_si256( reinterpret_cast<const __m256i*>( address ) );
331 #elif BLAZE_SSE2_MODE
332  return _mm_loadu_si128( reinterpret_cast<const __m128i*>( address ) );
333 #else
334  return If_< IsSigned<T>, SIMDcint64, SIMDcuint64 >( *address );
335 #endif
336 }
337 //*************************************************************************************************
338 
339 
340 
341 
342 //=================================================================================================
343 //
344 // 32-BIT FLOATING POINT SIMD TYPES
345 //
346 //=================================================================================================
347 
348 //*************************************************************************************************
358 BLAZE_ALWAYS_INLINE const SIMDfloat loadu( const float* address ) noexcept
359 {
360 #if BLAZE_AVX512F_MODE
361  return _mm512_loadu_ps( address );
362 #elif BLAZE_MIC_MODE
363  __m512 v1 = _mm512_setzero_ps();
364  v1 = _mm512_loadunpacklo_ps( v1, address );
365  v1 = _mm512_loadunpackhi_ps( v1, address+16UL );
366  return v1;
367 #elif BLAZE_AVX_MODE
368  return _mm256_loadu_ps( address );
369 #elif BLAZE_SSE_MODE
370  return _mm_loadu_ps( address );
371 #else
372  return *address;
373 #endif
374 }
375 //*************************************************************************************************
376 
377 
378 //*************************************************************************************************
388 BLAZE_ALWAYS_INLINE const SIMDcfloat loadu( const complex<float>* address ) noexcept
389 {
390  BLAZE_STATIC_ASSERT( sizeof( complex<float> ) == 2UL*sizeof( float ) );
391 
392 #if BLAZE_AVX512F_MODE
393  return _mm512_loadu_ps( address );
394 #elif BLAZE_MIC_MODE
395  __m512 v1 = _mm512_setzero_ps();
396  v1 = _mm512_loadunpacklo_ps( v1, reinterpret_cast<const float*>( address ) );
397  v1 = _mm512_loadunpackhi_ps( v1, reinterpret_cast<const float*>( address+8UL ) );
398  return v1;
399 #elif BLAZE_AVX_MODE
400  return _mm256_loadu_ps( reinterpret_cast<const float*>( address ) );
401 #elif BLAZE_SSE_MODE
402  return _mm_loadu_ps( reinterpret_cast<const float*>( address ) );
403 #else
404  return *address;
405 #endif
406 }
407 //*************************************************************************************************
408 
409 
410 
411 
412 //=================================================================================================
413 //
414 // 64-BIT FLOATING POINT SIMD TYPES
415 //
416 //=================================================================================================
417 
418 //*************************************************************************************************
428 BLAZE_ALWAYS_INLINE const SIMDdouble loadu( const double* address ) noexcept
429 {
430 #if BLAZE_AVX512F_MODE
431  return _mm512_loadu_pd( address );
432 #elif BLAZE_MIC_MODE
433  __m512d v1 = _mm512_setzero_pd();
434  v1 = _mm512_loadunpacklo_pd( v1, address );
435  v1 = _mm512_loadunpackhi_pd( v1, address+8UL );
436  return v1;
437 #elif BLAZE_AVX_MODE
438  return _mm256_loadu_pd( address );
439 #elif BLAZE_SSE2_MODE
440  return _mm_loadu_pd( address );
441 #else
442  return *address;
443 #endif
444 }
445 //*************************************************************************************************
446 
447 
448 //*************************************************************************************************
458 BLAZE_ALWAYS_INLINE const SIMDcdouble loadu( const complex<double>* address ) noexcept
459 {
460  BLAZE_STATIC_ASSERT( sizeof( complex<double> ) == 2UL*sizeof( double ) );
461 
462 #if BLAZE_AVX512F_MODE
463  return _mm512_loadu_pd( address );
464 #elif BLAZE_MIC_MODE
465  __m512d v1 = _mm512_setzero_pd();
466  v1 = _mm512_loadunpacklo_pd( v1, reinterpret_cast<const double*>( address ) );
467  v1 = _mm512_loadunpackhi_pd( v1, reinterpret_cast<const double*>( address+4UL ) );
468  return v1;
469 #elif BLAZE_AVX_MODE
470  return _mm256_loadu_pd( reinterpret_cast<const double*>( address ) );
471 #elif BLAZE_SSE2_MODE
472  return _mm_loadu_pd( reinterpret_cast<const double*>( address ) );
473 #else
474  return *address;
475 #endif
476 }
477 //*************************************************************************************************
478 
479 } // namespace blaze
480 
481 #endif
SIMD type for 16-bit signed integral data values.
SIMD type for 32-bit unsigned integral complex values.
SIMD type for 16-bit unsigned integral complex values.
SIMD type for 32-bit signed integral complex values.
Header file for the IsIntegral type trait.
Header file for the And class template.
SIMD type for 32-bit unsigned integral data values.
SIMD type for 64-bit double precision floating point data values.
BLAZE_ALWAYS_INLINE const EnableIf_< And< IsIntegral< T >, HasSize< T, 1UL > >, If_< IsSigned< T >, SIMDint8, SIMDuint8 > > loadu(const T *address) noexcept
Loads a vector of 1-byte integral values.
Definition: Loadu.h:77
SIMD type for 64-bit unsigned integral complex values.
SIMD type for 16-bit signed integral complex values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Header file for the If class template.
Compile time assertion.
SIMD type for 16-bit unsigned integral data values.
SIMD type for 32-bit signed integral data values.
Header file for the EnableIf class template.
Header file for the basic SIMD types.
SIMD type for 8-bit signed integral complex values.
Header file for the HasSize type trait.
SIMD type for 32-bit single precision complex values.
SIMD type for 64-bit signed integral complex values.
Header file for the IsSigned type trait.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 64-bit unsigned integral data values.
SIMD type for 64-bit integral data values.
SIMD type for 8-bit unsigned integral complex values.
SIMD type for 64-bit double precision complex values.
System settings for the SSE mode.
Header file for the complex data type.
#define BLAZE_STATIC_ASSERT(expr)
Compile time assertion macro.In case of an invalid compile time expression, a compilation error is cr...
Definition: StaticAssert.h:112
System settings for the inline keywords.