35#ifndef _BLAZE_MATH_SIMD_SIGN_H_
36#define _BLAZE_MATH_SIMD_SIGN_H_
70#if BLAZE_AVX512BW_MODE
72 const __m512i
zero ( _mm512_setzero_si512() );
73 const __mmask64 mask1( _mm512_cmplt_epi8_mask(
zero, a.value ) );
74 const __mmask64 mask2( _mm512_cmplt_epi8_mask( a.value,
zero ) );
75 const __m512i xmm1 ( _mm512_mask_blend_epi8( mask1,
zero, _mm512_set1_epi8( 1 ) ) );
76 return _mm512_mask_blend_epi8( mask2, xmm1, _mm512_set1_epi8( -1 ) );
80 return _mm256_sign_epi8( _mm256_set1_epi8( 1 ), a.value );
84 return _mm_sign_epi8( _mm_set1_epi8( 1 ), a.value );
113#if BLAZE_AVX512BW_MODE
115 const __m512i
zero ( _mm512_setzero_si512() );
116 const __mmask32 mask1( _mm512_cmplt_epi16_mask(
zero, a.value ) );
117 const __mmask32 mask2( _mm512_cmplt_epi16_mask( a.value,
zero ) );
118 const __m512i xmm1 ( _mm512_mask_blend_epi16( mask1,
zero, _mm512_set1_epi16( 1 ) ) );
119 return _mm512_mask_blend_epi16( mask2, xmm1, _mm512_set1_epi16( -1 ) );
123 return _mm256_sign_epi16( _mm256_set1_epi16( 1 ), a.value );
125#elif BLAZE_SSSE3_MODE
127 return _mm_sign_epi16( _mm_set1_epi16( 1 ), a.value );
156#if BLAZE_AVX512F_MODE
158 const __m512i
zero ( _mm512_setzero_si512() );
159 const __mmask16 mask1( _mm512_cmplt_epi32_mask(
zero, a.value ) );
160 const __mmask16 mask2( _mm512_cmplt_epi32_mask( a.value,
zero ) );
161 const __m512i xmm1 ( _mm512_mask_blend_epi32( mask1,
zero, _mm512_set1_epi32( 1 ) ) );
162 return _mm512_mask_blend_epi32( mask2, xmm1, _mm512_set1_epi32( -1 ) );
166 return _mm256_sign_epi32( _mm256_set1_epi32( 1 ), a.value );
168#elif BLAZE_SSSE3_MODE
170 return _mm_sign_epi32( _mm_set1_epi32( 1 ), a.value );
199#if BLAZE_AVX512F_MODE
201 const __m512i
zero ( _mm512_setzero_si512() );
202 const __mmask8 mask1( _mm512_cmplt_epi64_mask(
zero, a.value ) );
203 const __mmask8 mask2( _mm512_cmplt_epi64_mask( a.value,
zero ) );
204 const __m512i xmm1 ( _mm512_mask_blend_epi64( mask1,
zero, _mm512_set1_epi64( 1L ) ) );
205 return _mm512_mask_blend_epi64( mask2, xmm1, _mm512_set1_epi64( -1L ) );
234#if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER
236#elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
238 const __m512
zero ( _mm512_setzero_ps() );
239 const __mmask16 mask1( _mm512_cmplt_ps_mask(
zero, a.value ) );
240 const __mmask16 mask2( _mm512_cmplt_ps_mask( a.value,
zero ) );
241 const __m512 xmm1 ( _mm512_mask_blend_ps( mask1, a.value, _mm512_set1_ps( 1.0F ) ) );
242 return _mm512_mask_blend_ps( mask2, xmm1, _mm512_set1_ps( -1.0F ) );
246 const __m256
zero ( _mm256_setzero_ps() );
247 const __m256 mask1( _mm256_cmp_ps(
zero, a.value, _CMP_LT_OQ ) );
248 const __m256 mask2( _mm256_cmp_ps( a.value,
zero, _CMP_LT_OQ ) );
249 const __m256 xmm1 ( _mm256_blendv_ps( a.value, _mm256_set1_ps( 1.0F ), mask1 ) );
250 return _mm256_blendv_ps( xmm1, _mm256_set1_ps( -1.0F ), mask2 );
254 const __m128
zero ( _mm_setzero_ps() );
255 const __m128 mask1( _mm_cmplt_ps(
zero, a.value ) );
256 const __m128 mask2( _mm_cmplt_ps( a.value,
zero ) );
257 const __m128 xmm1 ( _mm_blendv_ps( a.value, _mm_set1_ps( 1.0F ), mask1 ) );
258 return _mm_blendv_ps( xmm1, _mm_set1_ps( -1.0F ), mask2 );
287#if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER
289#elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
291 const __m512d
zero ( _mm512_setzero_pd() );
292 const __mmask8 mask1( _mm512_cmplt_pd_mask(
zero, a.value ) );
293 const __mmask8 mask2( _mm512_cmplt_pd_mask( a.value,
zero ) );
294 const __m512d xmm1 ( _mm512_mask_blend_pd( mask1, a.value, _mm512_set1_pd( 1.0 ) ) );
295 return _mm512_mask_blend_pd( mask2, xmm1, _mm512_set1_pd( -1.0 ) );
299 const __m256d
zero ( _mm256_setzero_pd() );
300 const __m256d mask1( _mm256_cmp_pd(
zero, a.value, _CMP_LT_OQ ) );
301 const __m256d mask2( _mm256_cmp_pd( a.value,
zero, _CMP_LT_OQ ) );
302 const __m256d xmm1 ( _mm256_blendv_pd( a.value, _mm256_set1_pd( 1.0 ), mask1 ) );
303 return _mm256_blendv_pd( xmm1, _mm256_set1_pd( -1.0 ), mask2 );
307 const __m128d
zero ( _mm_setzero_pd() );
308 const __m128d mask1( _mm_cmplt_pd(
zero, a.value ) );
309 const __m128d mask2( _mm_cmplt_pd( a.value,
zero ) );
310 const __m128d xmm1 ( _mm_blendv_pd( a.value, _mm_set1_pd( 1.0 ), mask1 ) );
311 return _mm_blendv_pd( xmm1, _mm_set1_pd( -1.0 ), mask2 );
Header file for the basic SIMD types.
Compiler-specific system settings.
SIMD type for 64-bit double precision floating point data values.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 16-bit signed integral data values.
SIMD type for 32-bit signed integral data values.
SIMD type for 64-bit integral data values.
SIMD type for 8-bit signed integral data values.
BLAZE_ALWAYS_INLINE SIMDdouble sign(const SIMDdouble &a) noexcept=delete
Sign function for a vector of double precision floating point values.
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
constexpr decltype(auto) zero(size_t m, size_t n) noexcept
Creating a zero matrix.
Definition: ZeroMatrix.h:1356
System settings for the inline keywords.
System settings for the SSE mode.