35 #ifndef _BLAZE_MATH_SIMD_EQUAL_H_ 36 #define _BLAZE_MATH_SIMD_EQUAL_H_ 73 #if BLAZE_AVX512BW_MODE 75 return _mm512_cmpeq_epi8_mask( (~a).value, (~b).value ) == 0xffffffffffffffff;
79 return _mm256_movemask_epi8( _mm256_cmpeq_epi8( (~a).value, (~b).value ) ) == int(0xffffffff);
83 return _mm_movemask_epi8( _mm_cmpeq_epi8( (~a).value, (~b).value ) ) == int(0xffff);
106 #if BLAZE_AVX512BW_MODE 108 return _mm512_cmpeq_epi8_mask( (~a).value, (~b).value ) == 0xffffffffffffffff;
110 #elif BLAZE_AVX2_MODE 112 return _mm256_movemask_epi8( _mm256_cmpeq_epi8( (~a).value, (~b).value ) ) == int(0xffffffff);
114 #elif BLAZE_SSE2_MODE 116 return _mm_movemask_epi8( _mm_cmpeq_epi8( (~a).value, (~b).value ) ) == int(0xffff);
134 template<
typename T >
137 return equal<strict>( ~a, ~b );
152 template<
typename T >
155 return equal<strict>( ~a, ~b );
182 #if BLAZE_AVX512BW_MODE 184 return _mm512_cmpeq_epi16_mask( (~a).value, (~b).value ) == 0xffffffff;
186 #elif BLAZE_AVX2_MODE 188 return _mm256_movemask_epi8( _mm256_cmpeq_epi16( (~a).value, (~b).value ) ) == int(0xffffffff);
190 #elif BLAZE_SSE2_MODE 192 return _mm_movemask_epi8( _mm_cmpeq_epi16( (~a).value, (~b).value ) ) == int(0xffff);
215 #if BLAZE_AVX512BW_MODE 217 return _mm512_cmpeq_epi16_mask( (~a).value, (~b).value ) == 0xffffffff;
219 #elif BLAZE_AVX2_MODE 221 return _mm256_movemask_epi8( _mm256_cmpeq_epi16( (~a).value, (~b).value ) ) == int(0xffffffff);
223 #elif BLAZE_SSE2_MODE 225 return _mm_movemask_epi8( _mm_cmpeq_epi16( (~a).value, (~b).value ) ) == int(0xffff);
244 template<
typename T >
247 return equal<strict>( ~a, ~b );
262 template<
typename T >
265 return equal<strict>( ~a, ~b );
292 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 294 return _mm512_cmpeq_epi32_mask( (~a).value, (~b).value ) == 0xffff;
296 #elif BLAZE_AVX2_MODE 298 return _mm256_movemask_epi8( _mm256_cmpeq_epi32( (~a).value, (~b).value ) ) == int(0xffffffff);
300 #elif BLAZE_SSE2_MODE 302 return _mm_movemask_epi8( _mm_cmpeq_epi32( (~a).value, (~b).value ) ) == int(0xffff);
325 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 327 return _mm512_cmpeq_epi32_mask( (~a).value, (~b).value ) == 0xffff;
329 #elif BLAZE_AVX2_MODE 331 return _mm256_movemask_epi8( _mm256_cmpeq_epi32( (~a).value, (~b).value ) ) == int(0xffffffff);
333 #elif BLAZE_SSE2_MODE 335 return _mm_movemask_epi8( _mm_cmpeq_epi32( (~a).value, (~b).value ) ) == int(0xffff);
354 template<
typename T >
357 return equal<strict>( ~a, ~b );
372 template<
typename T >
375 return equal<strict>( ~a, ~b );
403 #if BLAZE_AVX512F_MODE 405 return _mm512_cmpeq_epi64_mask( (~a).value, (~b).value ) == 0xff;
407 #elif BLAZE_AVX2_MODE 409 return _mm256_movemask_epi8( _mm256_cmpeq_epi64( (~a).value, (~b).value ) ) == int(0xffffffff);
411 #elif BLAZE_SSE4_MODE 413 return _mm_movemask_epi8( _mm_cmpeq_epi64( (~a).value, (~b).value ) ) == int(0xffff);
436 #if BLAZE_AVX512F_MODE 438 return _mm512_cmpeq_epi64_mask( (~a).value, (~b).value ) == 0xff;
440 #elif BLAZE_AVX2_MODE 442 return _mm256_movemask_epi8( _mm256_cmpeq_epi64( (~a).value, (~b).value ) ) == int(0xffffffff);
444 #elif BLAZE_SSE4_MODE 446 return _mm_movemask_epi8( _mm_cmpeq_epi64( (~a).value, (~b).value ) ) == int(0xffff);
465 template<
typename T >
468 return equal<strict>( ~a, ~b );
483 template<
typename T >
486 return equal<strict>( ~a, ~b );
517 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 519 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 522 const __m512 accu( _mm512_set1_ps( static_cast<float>(
accuracy ) ) );
524 const __m512 xmm1( _mm512_abs_ps( _mm512_sub_ps( a.value, b.value ) ) );
525 const __m512 xmm2( _mm512_max_ps( accu, _mm512_mul_ps( accu, _mm512_abs_ps( a.value ) ) ) );
526 return _mm512_cmple_ps_mask( xmm1, xmm2 ) == 0xffff;
529 return _mm512_cmpeq_ps_mask( a.value, b.value ) == 0xffff;
535 const __m256 accu( _mm256_set1_ps( static_cast<float>(
accuracy ) ) );
536 const __m256 mask( _mm256_castsi256_ps( _mm256_set1_epi32( 0x80000000 ) ) );
538 const __m256 xmm1( _mm256_andnot_ps( mask, _mm256_sub_ps( a.value, b.value ) ) );
539 const __m256 xmm2( _mm256_max_ps( accu, _mm256_mul_ps( accu, _mm256_andnot_ps( mask, a.value ) ) ) );
540 return _mm256_movemask_ps( _mm256_cmp_ps( xmm1, xmm2, _CMP_LE_OQ ) ) == 0xff;
543 return _mm256_movemask_ps( _mm256_cmp_ps( a.value, b.value, _CMP_EQ_OQ ) ) == 0xff;
549 const __m128 accu( _mm_set1_ps( static_cast<float>(
accuracy ) ) );
550 const __m128 mask( _mm_castsi128_ps( _mm_set1_epi32( 0x80000000 ) ) );
552 const __m128 xmm1( _mm_andnot_ps( mask, _mm_sub_ps( a.value, b.value ) ) );
553 const __m128 xmm2( _mm_max_ps( accu, _mm_mul_ps( accu, _mm_andnot_ps( mask, a.value ) ) ) );
554 return _mm_movemask_ps( _mm_cmple_ps( xmm1, xmm2 ) ) == 0xf;
557 return _mm_movemask_ps( _mm_cmpeq_ps( a.value, b.value ) ) == 0xf;
585 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 587 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 590 const __m512 accu( _mm512_set1_ps( static_cast<float>(
accuracy ) ) );
592 const __m512 xmm1( _mm512_abs_ps( _mm512_sub_ps( a.value, b.value ) ) );
593 const __m512 xmm2( _mm512_max_ps( accu, _mm512_mul_ps( accu, _mm512_abs_ps( a.value ) ) ) );
594 return _mm512_cmple_ps_mask( xmm1, xmm2 ) == 0xffff;
597 return _mm512_cmpeq_ps_mask( a.value, b.value ) == 0xffff;
603 const __m256 accu( _mm256_set1_ps( static_cast<float>(
accuracy ) ) );
604 const __m256 mask( _mm256_castsi256_ps( _mm256_set1_epi32( 0x80000000 ) ) );
606 const __m256 xmm1( _mm256_andnot_ps( mask, _mm256_sub_ps( a.value, b.value ) ) );
607 const __m256 xmm2( _mm256_max_ps( accu, _mm256_mul_ps( accu, _mm256_andnot_ps( mask, a.value ) ) ) );
608 return _mm256_movemask_ps( _mm256_cmp_ps( xmm1, xmm2, _CMP_LE_OQ ) ) == 0xff;
611 return _mm256_movemask_ps( _mm256_cmp_ps( a.value, b.value, _CMP_EQ_OQ ) ) == 0xff;
617 const __m128 accu( _mm_set1_ps( static_cast<float>(
accuracy ) ) );
618 const __m128 mask( _mm_castsi128_ps( _mm_set1_epi32( 0x80000000 ) ) );
620 const __m128 xmm1( _mm_andnot_ps( mask, _mm_sub_ps( a.value, b.value ) ) );
621 const __m128 xmm2( _mm_max_ps( accu, _mm_mul_ps( accu, _mm_andnot_ps( mask, a.value ) ) ) );
622 return _mm_movemask_ps( _mm_cmple_ps( xmm1, xmm2 ) ) == 0xf;
625 return _mm_movemask_ps( _mm_cmpeq_ps( a.value, b.value ) ) == 0xf;
645 template<
typename T1
648 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 650 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 652 return _mm512_cmpeq_ps_mask( (~a).
eval().value, (~b).
eval().value ) == 0xffff;
656 return _mm256_movemask_ps( _mm256_cmp_ps( (~a).
eval().value, (~b).
eval().value, _CMP_EQ_OQ ) ) == 0xff;
660 return _mm_movemask_ps( _mm_cmpeq_ps( (~a).
eval().value, (~b).
eval().value ) ) == 0xf;
679 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 681 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 683 return _mm512_cmpeq_ps_mask( a.value, b.value ) == 0xffff;
687 return _mm256_movemask_ps( _mm256_cmp_ps( a.value, b.value, _CMP_EQ_OQ ) ) == 0xff;
691 return _mm_movemask_ps( _mm_cmpeq_ps( a.value, b.value ) ) == 0xf;
725 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 727 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 730 const __m512 accu( _mm512_set1_pd( static_cast<double>(
accuracy ) ) );
732 const __m512 xmm1( _mm512_abs_pd( _mm512_sub_pd( a.value, b.value ) ) );
733 const __m512 xmm2( _mm512_max_pd( accu, _mm512_mul_pd( accu, _mm512_abs_pd( a.value ) ) ) );
734 return _mm512_cmple_pd_mask( xmm1, xmm2 ) == 0xff;
737 return _mm512_cmpeq_pd_mask( a.value, b.value ) == 0xff;
743 const __m256d accu( _mm256_set1_pd( static_cast<double>(
accuracy ) ) );
744 const __m256d mask( _mm256_castsi256_pd(
745 _mm256_set_epi32( 0x80000000, 0x0, 0x80000000, 0x0, 0x80000000, 0x0, 0x80000000, 0x0 ) ) );
747 const __m256d xmm1( _mm256_andnot_pd( mask, _mm256_sub_pd( a.value, b.value ) ) );
748 const __m256d xmm2( _mm256_max_pd( accu, _mm256_mul_pd( accu, _mm256_andnot_pd( mask, a.value ) ) ) );
749 return _mm256_movemask_pd( _mm256_cmp_pd( xmm1, xmm2, _CMP_LE_OQ ) ) == 0xf;
752 return _mm256_movemask_pd( _mm256_cmp_pd( a.value, b.value, _CMP_EQ_OQ ) ) == 0xf;
755 #elif BLAZE_SSE2_MODE 758 const __m128d accu( _mm_set1_pd( static_cast<double>(
accuracy ) ) );
759 const __m128d mask( _mm_castsi128_pd( _mm_set_epi32( 0x80000000, 0x0, 0x80000000, 0x0 ) ) );
761 const __m128d xmm1( _mm_andnot_pd( mask, _mm_sub_pd( a.value, b.value ) ) );
762 const __m128d xmm2( _mm_max_pd( accu, _mm_mul_pd( accu, _mm_andnot_pd( mask, a.value ) ) ) );
763 return _mm_movemask_pd( _mm_cmple_pd( xmm1, xmm2 ) ) == 0x3;
766 return _mm_movemask_pd( _mm_cmpeq_pd( a.value, b.value ) ) == 0x3;
794 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 796 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 799 const __m512 accu( _mm512_set1_pd( static_cast<double>(
accuracy ) ) );
801 const __m512 xmm1( _mm512_abs_pd( _mm512_sub_pd( a.value, b.value ) ) );
802 const __m512 xmm2( _mm512_max_pd( accu, _mm512_mul_pd( accu, _mm512_abs_pd( a.value ) ) ) );
803 return _mm512_cmple_pd_mask( xmm1, xmm2 ) == 0xff;
806 return _mm512_cmpeq_pd_mask( a.value, b.value ) == 0xff;
812 const __m256d accu( _mm256_set1_pd( static_cast<double>(
accuracy ) ) );
813 const __m256d mask( _mm256_castsi256_pd(
814 _mm256_set_epi32( 0x80000000, 0x0, 0x80000000, 0x0, 0x80000000, 0x0, 0x80000000, 0x0 ) ) );
816 const __m256d xmm1( _mm256_andnot_pd( mask, _mm256_sub_pd( a.value, b.value ) ) );
817 const __m256d xmm2( _mm256_max_pd( accu, _mm256_mul_pd( accu, _mm256_andnot_pd( mask, a.value ) ) ) );
818 return _mm256_movemask_pd( _mm256_cmp_pd( xmm1, xmm2, _CMP_LE_OQ ) ) == 0xf;
821 return _mm256_movemask_pd( _mm256_cmp_pd( a.value, b.value, _CMP_EQ_OQ ) ) == 0xf;
824 #elif BLAZE_SSE2_MODE 827 const __m128d accu( _mm_set1_pd( static_cast<double>(
accuracy ) ) );
828 const __m128d mask( _mm_castsi128_pd( _mm_set_epi32( 0x80000000, 0x0, 0x80000000, 0x0 ) ) );
830 const __m128d xmm1( _mm_andnot_pd( mask, _mm_sub_pd( a.value, b.value ) ) );
831 const __m128d xmm2( _mm_max_pd( accu, _mm_mul_pd( accu, _mm_andnot_pd( mask, a.value ) ) ) );
832 return _mm_movemask_pd( _mm_cmple_pd( xmm1, xmm2 ) ) == 0x3;
835 return _mm_movemask_pd( _mm_cmpeq_pd( a.value, b.value ) ) == 0x3;
855 template<
typename T1
858 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 860 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 862 return _mm512_cmpeq_pd_mask( (~a).
eval().value, (~b).
eval().value ) == 0xff;
866 return _mm256_movemask_pd( _mm256_cmp_pd( (~a).
eval().value, (~b).
eval().value, _CMP_EQ_OQ ) ) == 0xf;
868 #elif BLAZE_SSE2_MODE 870 return _mm_movemask_pd( _mm_cmpeq_pd( (~a).
eval().value, (~b).
eval().value ) ) == 0x3;
889 #if ( BLAZE_AVX512F_MODE || BLAZE_MIC_MODE ) && BLAZE_GNU_COMPILER 891 #elif BLAZE_AVX512F_MODE || BLAZE_MIC_MODE 893 return _mm512_cmpeq_pd_mask( a.value, b.value ) == 0xff;
897 return _mm256_movemask_pd( _mm256_cmp_pd( a.value, b.value, _CMP_EQ_OQ ) ) == 0xf;
899 #elif BLAZE_SSE2_MODE 901 return _mm_movemask_pd( _mm_cmpeq_pd( a.value, b.value ) ) == 0x3;
925 template<
typename T >
928 return !( (~a) == (~b) );
Computation accuracy for floating point data types.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
constexpr bool operator==(const NegativeAccuracy< A > &lhs, const T &rhs)
Equality comparison between a NegativeAccuracy object and a floating point value.
Definition: Accuracy.h:253
Compiler-specific system settings.
Base class for all SIMD data types.The SIMDPack class template is a base class for all SIMD data type...
Definition: SIMDPack.h:63
decltype(auto) eval(const DenseMatrix< MT, SO > &dm)
Forces the evaluation of the given dense matrix expression dm.
Definition: DMatEvalExpr.h:786
constexpr bool operator!=(const NegativeAccuracy< A > &lhs, const T &rhs)
Inequality comparison between a NegativeAccuracy object and a floating point value.
Definition: Accuracy.h:293
Header file for the basic SIMD types.
Header file for the relaxation flag types.
constexpr bool relaxed
Relaxation flag for relaxed semantics.
Definition: RelaxationFlag.h:85
System settings for the SSE mode.
constexpr Accuracy accuracy
Global Accuracy instance.The blaze::accuracy instance can be used wherever a floating point data type...
Definition: Accuracy.h:907
System settings for the inline keywords.
bool equal(const SharedValue< T1 > &lhs, const SharedValue< T2 > &rhs)
Equality check for a two shared values.
Definition: SharedValue.h:342