Blaze 3.9
Mult.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_SIMD_MULT_H_
36#define _BLAZE_MATH_SIMD_MULT_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
44#include <blaze/system/Inline.h>
46
47
48namespace blaze {
49
50//=================================================================================================
51//
52// 16-BIT INTEGRAL SIMD TYPES
53//
54//=================================================================================================
55
56//*************************************************************************************************
66template< typename T > // Type of both operands
68 operator*( const SIMDi16<T>& a, const SIMDi16<T>& b ) noexcept
69#if BLAZE_AVX512BW_MODE
70{
71 return _mm512_mullo_epi16( (*a).value, (*b).value );
72}
73#elif BLAZE_AVX2_MODE
74{
75 return _mm256_mullo_epi16( (*a).value, (*b).value );
76}
77#elif BLAZE_SSE2_MODE
78{
79 return _mm_mullo_epi16( (*a).value, (*b).value );
80}
81#else
82= delete;
83#endif
84//*************************************************************************************************
85
86
87//*************************************************************************************************
97template< typename T1 // Type of the left-hand side operand
98 , typename T2 > // Type of the right-hand side operand
100 operator*( const SIMDi16<T1>& a, const SIMDi16<T2>& b ) noexcept
101#if BLAZE_AVX512BW_MODE
102{
103 return _mm512_mullo_epi16( (*a).value, (*b).value );
104}
105#elif BLAZE_AVX2_MODE
106{
107 return _mm256_mullo_epi16( (*a).value, (*b).value );
108}
109#elif BLAZE_SSE2_MODE
110{
111 return _mm_mullo_epi16( (*a).value, (*b).value );
112}
113#else
114= delete;
115#endif
116//*************************************************************************************************
117
118
119//*************************************************************************************************
130 operator*( const SIMDcint16& a, const SIMDint16& b ) noexcept
131#if BLAZE_AVX512BW_MODE
132{
133 return _mm512_mullo_epi16( (*a).value, (*b).value );
134}
135#elif BLAZE_AVX2_MODE
136{
137 return _mm256_mullo_epi16( (*a).value, (*b).value );
138}
139#elif BLAZE_SSE2_MODE
140{
141 return _mm_mullo_epi16( (*a).value, (*b).value );
142}
143#else
144= delete;
145#endif
146//*************************************************************************************************
147
148
149//*************************************************************************************************
160 operator*( const SIMDcuint16& a, const SIMDuint16& b ) noexcept
161#if BLAZE_AVX512BW_MODE
162{
163 return _mm512_mullo_epi16( (*a).value, (*b).value );
164}
165#elif BLAZE_AVX2_MODE
166{
167 return _mm256_mullo_epi16( (*a).value, (*b).value );
168}
169#elif BLAZE_SSE2_MODE
170{
171 return _mm_mullo_epi16( (*a).value, (*b).value );
172}
173#else
174= delete;
175#endif
176//*************************************************************************************************
177
178
179//*************************************************************************************************
190 operator*( const SIMDint16& a, const SIMDcint16& b ) noexcept
191#if BLAZE_AVX512BW_MODE
192{
193 return _mm512_mullo_epi16( (*a).value, (*b).value );
194}
195#elif BLAZE_AVX2_MODE
196{
197 return _mm256_mullo_epi16( (*a).value, (*b).value );
198}
199#elif BLAZE_SSE2_MODE
200{
201 return _mm_mullo_epi16( (*a).value, (*b).value );
202}
203#else
204= delete;
205#endif
206//*************************************************************************************************
207
208
209//*************************************************************************************************
220 operator*( const SIMDuint16& a, const SIMDcuint16& b ) noexcept
221#if BLAZE_AVX512BW_MODE
222{
223 return _mm512_mullo_epi16( (*a).value, (*b).value );
224}
225#elif BLAZE_AVX2_MODE
226{
227 return _mm256_mullo_epi16( (*a).value, (*b).value );
228}
229#elif BLAZE_SSE2_MODE
230{
231 return _mm_mullo_epi16( (*a).value, (*b).value );
232}
233#else
234= delete;
235#endif
236//*************************************************************************************************
237
238
239//*************************************************************************************************
249template< typename T > // Type of both operands
251 operator*( const SIMDci16<T>& a, const SIMDci16<T>& b ) noexcept
252#if BLAZE_AVX512BW_MODE
253{
254 __m512i a_ii = _mm512_shufflelo_epi16( (*a).value, 0b11'11'01'01 );
255 a_ii = _mm512_shufflehi_epi16( a_ii, 0b11'11'01'01 );
256
257 __m512i b_ri = _mm512_shufflelo_epi16( (*b).value, 0b10'11'00'01 );
258 b_ri = _mm512_shufflehi_epi16( b_ri, 0b10'11'00'01 );
259
260 __m512i a_rr = _mm512_shufflelo_epi16( (*a).value, 0b10'10'00'00 );
261 a_rr = _mm512_shufflehi_epi16( a_rr, 0b10'10'00'00 );
262
263 const __m512i a_rr_b = _mm512_mullo_epi16( a_rr, (*b).value );
264 const __m512i a_ii_b_ri = _mm512_mullo_epi16( a_ii, b_ri );
265 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi16( a_ii_b_ri, 0x55555555,
266 _mm512_setzero_si512(), a_ii_b_ri );
267 return _mm512_add_epi16( a_rr_b, a_ii_b_ri_signed );
268}
269#elif BLAZE_AVX2_MODE
270{
271 __m256i x, y, z;
272 const __m256i neg( _mm256_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
273
274 x = _mm256_shufflelo_epi16( (*a).value, 0xA0 );
275 x = _mm256_shufflehi_epi16( x, 0xA0 );
276 z = _mm256_mullo_epi16( x, (*b).value );
277 x = _mm256_shufflelo_epi16( (*a).value, 0xF5 );
278 x = _mm256_shufflehi_epi16( x, 0xF5 );
279 y = _mm256_shufflelo_epi16( (*b).value, 0xB1 );
280 y = _mm256_shufflehi_epi16( y, 0xB1 );
281 y = _mm256_mullo_epi16( x, y );
282 y = _mm256_mullo_epi16( y, neg );
283 return _mm256_add_epi16( z, y );
284}
285#elif BLAZE_SSE2_MODE
286{
287 __m128i x, y, z;
288 const __m128i neg( _mm_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1 ) );
289
290 x = _mm_shufflelo_epi16( (*a).value, 0xA0 );
291 x = _mm_shufflehi_epi16( x, 0xA0 );
292 z = _mm_mullo_epi16( x, (*b).value );
293 x = _mm_shufflelo_epi16( (*a).value, 0xF5 );
294 x = _mm_shufflehi_epi16( x, 0xF5 );
295 y = _mm_shufflelo_epi16( (*b).value, 0xB1 );
296 y = _mm_shufflehi_epi16( y, 0xB1 );
297 y = _mm_mullo_epi16( x, y );
298 y = _mm_mullo_epi16( y, neg );
299 return _mm_add_epi16( z, y );
300}
301#else
302= delete;
303#endif
304//*************************************************************************************************
305
306
307
308
309//=================================================================================================
310//
311// 32-BIT INTEGRAL SIMD TYPES
312//
313//=================================================================================================
314
315//*************************************************************************************************
325template< typename T > // Type of both operands
327 operator*( const SIMDi32<T>& a, const SIMDi32<T>& b ) noexcept
328#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
329{
330 return _mm512_mullo_epi32( (*a).value, (*b).value );
331}
332#elif BLAZE_AVX2_MODE
333{
334 return _mm256_mullo_epi32( (*a).value, (*b).value );
335}
336#elif BLAZE_SSE4_MODE
337{
338 return _mm_mullo_epi32( (*a).value, (*b).value );
339}
340#else
341= delete;
342#endif
343//*************************************************************************************************
344
345
346//*************************************************************************************************
356template< typename T1 // Type of the left-hand side operand
357 , typename T2 > // Type of the right-hand side operand
359 operator*( const SIMDi32<T1>& a, const SIMDi32<T2>& b ) noexcept
360#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
361{
362 return _mm512_mullo_epi32( (*a).value, (*b).value );
363}
364#elif BLAZE_AVX2_MODE
365{
366 return _mm256_mullo_epi32( (*a).value, (*b).value );
367}
368#elif BLAZE_SSE4_MODE
369{
370 return _mm_mullo_epi32( (*a).value, (*b).value );
371}
372#else
373= delete;
374#endif
375//*************************************************************************************************
376
377
378//*************************************************************************************************
389 operator*( const SIMDcint32& a, const SIMDint32& b ) noexcept
390#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
391{
392 return _mm512_mullo_epi32( (*a).value, (*b).value );
393}
394#elif BLAZE_AVX2_MODE
395{
396 return _mm256_mullo_epi32( (*a).value, (*b).value );
397}
398#elif BLAZE_SSE4_MODE
399{
400 return _mm_mullo_epi32( (*a).value, (*b).value );
401}
402#else
403= delete;
404#endif
405//*************************************************************************************************
406
407
408//*************************************************************************************************
419 operator*( const SIMDcuint32& a, const SIMDuint32& b ) noexcept
420#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
421{
422 return _mm512_mullo_epi32( (*a).value, (*b).value );
423}
424#elif BLAZE_AVX2_MODE
425{
426 return _mm256_mullo_epi32( (*a).value, (*b).value );
427}
428#elif BLAZE_SSE4_MODE
429{
430 return _mm_mullo_epi32( (*a).value, (*b).value );
431}
432#else
433= delete;
434#endif
435//*************************************************************************************************
436
437
438//*************************************************************************************************
448template< typename T1 // Type of the left-hand side operand
449 , typename T2 > // Type of the right-hand side operand
451 operator*( const SIMDint32& a, const SIMDcint32& b ) noexcept
452#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
453{
454 return _mm512_mullo_epi32( (*a).value, (*b).value );
455}
456#elif BLAZE_AVX2_MODE
457{
458 return _mm256_mullo_epi32( (*a).value, (*b).value );
459}
460#elif BLAZE_SSE4_MODE
461{
462 return _mm_mullo_epi32( (*a).value, (*b).value );
463}
464#else
465= delete;
466#endif
467//*************************************************************************************************
468
469
470//*************************************************************************************************
480template< typename T1 // Type of the left-hand side operand
481 , typename T2 > // Type of the right-hand side operand
483 operator*( const SIMDuint32& a, const SIMDcuint32& b ) noexcept
484#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
485{
486 return _mm512_mullo_epi32( (*a).value, (*b).value );
487}
488#elif BLAZE_AVX2_MODE
489{
490 return _mm256_mullo_epi32( (*a).value, (*b).value );
491}
492#elif BLAZE_SSE4_MODE
493{
494 return _mm_mullo_epi32( (*a).value, (*b).value );
495}
496#else
497= delete;
498#endif
499//*************************************************************************************************
500
501
502//*************************************************************************************************
512template< typename T > // Type of both operands
514 operator*( const SIMDci32<T>& a, const SIMDci32<T>& b ) noexcept
515#if BLAZE_AVX512F_MODE
516{
517 const __m512i a_ii = _mm512_shuffle_epi32( (*a).value, _MM_PERM_DDBB );
518 const __m512i b_ri = _mm512_shuffle_epi32( (*b).value, _MM_PERM_CDAB );
519 const __m512i a_rr = _mm512_shuffle_epi32( (*a).value, _MM_PERM_CCAA );
520
521 const __m512i a_rr_b = _mm512_mullo_epi32( a_rr, (*b).value );
522 const __m512i a_ii_b_ri = _mm512_mullo_epi32( a_ii, b_ri );
523 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi32( a_ii_b_ri, 0b0101010101010101,
524 _mm512_setzero_si512(), a_ii_b_ri );
525 return _mm512_add_epi32( a_rr_b, a_ii_b_ri_signed );
526}
527#elif BLAZE_AVX2_MODE
528{
529 __m256i x, y, z;
530 const __m256i neg( _mm256_set_epi32( 1, -1, 1, -1, 1, -1, 1, -1 ) );
531
532 x = _mm256_shuffle_epi32( (*a).value, 0xA0 );
533 z = _mm256_mullo_epi32( x, (*b).value );
534 x = _mm256_shuffle_epi32( (*a).value, 0xF5 );
535 y = _mm256_shuffle_epi32( (*b).value, 0xB1 );
536 y = _mm256_mullo_epi32( x, y );
537 y = _mm256_mullo_epi32( y, neg );
538 return _mm256_add_epi32( z, y );
539}
540#elif BLAZE_SSE4_MODE
541{
542 __m128i x, y, z;
543 const __m128i neg( _mm_set_epi32( 1, -1, 1, -1 ) );
544
545 x = _mm_shuffle_epi32( (*a).value, 0xA0 );
546 z = _mm_mullo_epi32( x, (*b).value );
547 x = _mm_shuffle_epi32( (*a).value, 0xF5 );
548 y = _mm_shuffle_epi32( (*b).value, 0xB1 );
549 y = _mm_mullo_epi32( x, y );
550 y = _mm_mullo_epi32( y, neg );
551 return _mm_add_epi32( z, y );
552}
553#else
554= delete;
555#endif
556//*************************************************************************************************
557
558
559
560
561//=================================================================================================
562//
563// 64-BIT INTEGRAL SIMD TYPES
564//
565//=================================================================================================
566
567//*************************************************************************************************
577template< typename T > // Type of both operands
579 operator*( const SIMDi64<T>& a, const SIMDi64<T>& b ) noexcept
580#if BLAZE_AVX512DQ_MODE
581{
582 return _mm512_mullo_epi64( (*a).value, (*b).value );
583}
584#else
585= delete;
586#endif
587//*************************************************************************************************
588
589
590//*************************************************************************************************
600template< typename T1 // Type of the left-hand side operand
601 , typename T2 > // Type of the right-hand side operand
603 operator*( const SIMDi64<T1>& a, const SIMDi64<T2>& b ) noexcept
604#if BLAZE_AVX512DQ_MODE
605{
606 return _mm512_mullo_epi64( (*a).value, (*b).value );
607}
608#else
609= delete;
610#endif
611//*************************************************************************************************
612
613
614//*************************************************************************************************
625 operator*( const SIMDcint64& a, const SIMDint64& b ) noexcept
626#if BLAZE_AVX512DQ_MODE
627{
628 return _mm512_mullo_epi64( (*a).value, (*b).value );
629}
630#else
631= delete;
632#endif
633//*************************************************************************************************
634
635
636//*************************************************************************************************
647 operator*( const SIMDcuint64& a, const SIMDuint64& b ) noexcept
648#if BLAZE_AVX512DQ_MODE
649{
650 return _mm512_mullo_epi64( (*a).value, (*b).value );
651}
652#else
653= delete;
654#endif
655//*************************************************************************************************
656
657
658//*************************************************************************************************
668template< typename T1 // Type of the left-hand side operand
669 , typename T2 > // Type of the right-hand side operand
671 operator*( const SIMDint64& a, const SIMDcint64& b ) noexcept
672#if BLAZE_AVX512DQ_MODE
673{
674 return _mm512_mullo_epi64( (*a).value, (*b).value );
675}
676#else
677= delete;
678#endif
679//*************************************************************************************************
680
681
682//*************************************************************************************************
692template< typename T1 // Type of the left-hand side operand
693 , typename T2 > // Type of the right-hand side operand
695 operator*( const SIMDuint64& a, const SIMDcuint64& b ) noexcept
696#if BLAZE_AVX512DQ_MODE || BLAZE_MIC_MODE
697{
698 return _mm512_mullo_epi64( (*a).value, (*b).value );
699}
700#else
701= delete;
702#endif
703//*************************************************************************************************
704
705
706//*************************************************************************************************
716template< typename T > // Type of both operands
718 operator*( const SIMDci64<T>& a, const SIMDci64<T>& b ) noexcept
719#if BLAZE_AVX512DQ_MODE
720{
721 const __m512i a_ii = _mm512_shuffle_epi32( (*a).value, 0b11'10'11'10 );
722 const __m512i b_ri = _mm512_shuffle_epi32( (*b).value, 0b01'00'11'10 );
723 const __m512i a_rr = _mm512_shuffle_epi32( (*a).value, 0b01'00'01'00 );
724
725 const __m512i a_rr_b = _mm512_mullo_epi64( a_rr, (*b).value );
726 const __m512i a_ii_b_ri = _mm512_mullo_epi64( a_ii, b_ri );
727 const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi64( a_ii_b_ri, 0b01010101,
728 _mm512_setzero_si512(), a_ii_b_ri );
729 return _mm512_add_epi64( a_rr_b, a_ii_b_ri_signed );
730}
731#else
732= delete;
733#endif
734//*************************************************************************************************
735
736
737
738
739//=================================================================================================
740//
741// 32-BIT FLOATING POINT SIMD TYPES
742//
743//=================================================================================================
744
745//*************************************************************************************************
752template< typename T1 // Type of the left-hand side operand
753 , typename T2 > // Type of the right-hand side operand
755 : public SIMDf32< SIMDf32MultExpr<T1,T2> >
756{
757 //**Type definitions****************************************************************************
759 using BaseType = SIMDf32<This>;
760 //**********************************************************************************************
761
762 //**Constructor*********************************************************************************
768 BLAZE_ALWAYS_INLINE SIMDf32MultExpr( const T1& a, const T2& b )
769 : a_( a ) // The left-hand side operand for the multiplication
770 , b_( b ) // The right-hand side operand for the multiplication
771 {}
772 //**********************************************************************************************
773
774 //**Evaluation function*************************************************************************
779 BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept
780#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
781 {
782 return _mm512_mul_ps( a_.eval().value, b_.eval().value );
783 }
784#elif BLAZE_AVX_MODE
785 {
786 return _mm256_mul_ps( a_.eval().value, b_.eval().value );
787 }
788#elif BLAZE_SSE_MODE
789 {
790 return _mm_mul_ps( a_.eval().value, b_.eval().value );
791 }
792#else
793 = delete;
794#endif
795 //**********************************************************************************************
796
797 //**Member variables****************************************************************************
798 const T1 a_;
799 const T2 b_;
800 //**********************************************************************************************
801};
802//*************************************************************************************************
803
804
805//*************************************************************************************************
815template< typename T1 // Type of the left-hand side operand
816 , typename T2 > // Type of the right-hand side operand
818 operator*( const SIMDf32<T1>& a, const SIMDf32<T2>& b ) noexcept
819{
820 return SIMDf32MultExpr<T1,T2>( *a, *b );
821}
822//*************************************************************************************************
823
824
825//*************************************************************************************************
836 operator*( const SIMDcfloat& a, const SIMDfloat& b ) noexcept
837#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
838{
839 return _mm512_mul_ps( a.value, b.value );
840}
841#elif BLAZE_AVX_MODE
842{
843 return _mm256_mul_ps( a.value, b.value );
844}
845#elif BLAZE_SSE_MODE
846{
847 return _mm_mul_ps( a.value, b.value );
848}
849#else
850= delete;
851#endif
852//*************************************************************************************************
853
854
855//*************************************************************************************************
866 operator*( const SIMDfloat& a, const SIMDcfloat& b ) noexcept
867#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
868{
869 return _mm512_mul_ps( a.value, b.value );
870}
871#elif BLAZE_AVX_MODE
872{
873 return _mm256_mul_ps( a.value, b.value );
874}
875#elif BLAZE_SSE_MODE
876{
877 return _mm_mul_ps( a.value, b.value );
878}
879#else
880= delete;
881#endif
882//*************************************************************************************************
883
884
885//*************************************************************************************************
896 operator*( const SIMDcfloat& a, const SIMDcfloat& b ) noexcept
897#if BLAZE_AVX512F_MODE
898{
899 const __m512 a_ii = _mm512_permute_ps( a.value, 0b11'11'01'01 );
900 const __m512 b_ri = _mm512_permute_ps( b.value, 0b10'11'00'01 );
901 const __m512 a_rr = _mm512_permute_ps( a.value, 0b10'10'00'00 );
902 return _mm512_fmaddsub_ps( a_rr, b.value, _mm512_mul_ps( a_ii, b_ri ) );
903}
904#elif BLAZE_AVX_MODE
905{
906 __m256 x, y, z;
907
908 x = _mm256_shuffle_ps( a.value, a.value, 0xA0 );
909 z = _mm256_mul_ps( x, b.value );
910 x = _mm256_shuffle_ps( a.value, a.value, 0xF5 );
911 y = _mm256_shuffle_ps( b.value, b.value, 0xB1 );
912 y = _mm256_mul_ps( x, y );
913 return _mm256_addsub_ps( z, y );
914}
915#elif BLAZE_SSE3_MODE
916{
917 __m128 x, y, z;
918
919 x = _mm_shuffle_ps( a.value, a.value, 0xA0 );
920 z = _mm_mul_ps( x, b.value );
921 x = _mm_shuffle_ps( a.value, a.value, 0xF5 );
922 y = _mm_shuffle_ps( b.value, b.value, 0xB1 );
923 y = _mm_mul_ps( x, y );
924 return _mm_addsub_ps( z, y );
925}
926#else
927= delete;
928#endif
929//*************************************************************************************************
930
931
932
933
934//=================================================================================================
935//
936// 64-BIT FLOATING POINT SIMD TYPES
937//
938//=================================================================================================
939
940//*************************************************************************************************
947template< typename T1 // Type of the left-hand side operand
948 , typename T2 > // Type of the right-hand side operand
950 : public SIMDf64< SIMDf64MultExpr<T1,T2> >
951{
952 //**Type definitions****************************************************************************
954 using BaseType = SIMDf64<This>;
955 //**********************************************************************************************
956
957 //**Constructor*********************************************************************************
963 BLAZE_ALWAYS_INLINE SIMDf64MultExpr( const T1& a, const T2& b )
964 : a_( a ) // The left-hand side operand for the multiplication
965 , b_( b ) // The right-hand side operand for the multiplication
966 {}
967 //**********************************************************************************************
968
969 //**Evaluation function*************************************************************************
974 BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept
975#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
976 {
977 return _mm512_mul_pd( a_.eval().value, b_.eval().value );
978 }
979#elif BLAZE_AVX_MODE
980 {
981 return _mm256_mul_pd( a_.eval().value, b_.eval().value );
982 }
983#elif BLAZE_SSE2_MODE
984 {
985 return _mm_mul_pd( a_.eval().value, b_.eval().value );
986 }
987#else
988 = delete;
989#endif
990 //**********************************************************************************************
991
992 //**Member variables****************************************************************************
993 const T1 a_;
994 const T2 b_;
995 //**********************************************************************************************
996};
997//*************************************************************************************************
998
999
1000//*************************************************************************************************
1010template< typename T1 // Type of the left-hand side operand
1011 , typename T2 > // Type of the right-hand side operand
1013 operator*( const SIMDf64<T1>& a, const SIMDf64<T2>& b ) noexcept
1014{
1015 return SIMDf64MultExpr<T1,T2>( *a, *b );
1016}
1017//*************************************************************************************************
1018
1019
1020//*************************************************************************************************
1031 operator*( const SIMDcdouble& a, const SIMDdouble& b ) noexcept
1032#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
1033{
1034 return _mm512_mul_pd( a.value, b.value );
1035}
1036#elif BLAZE_AVX_MODE
1037{
1038 return _mm256_mul_pd( a.value, b.value );
1039}
1040#elif BLAZE_SSE2_MODE
1041{
1042 return _mm_mul_pd( a.value, b.value );
1043}
1044#else
1045= delete;
1046#endif
1047//*************************************************************************************************
1048
1049
1050//*************************************************************************************************
1061 operator*( const SIMDdouble& a, const SIMDcdouble& b ) noexcept
1062#if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
1063{
1064 return _mm512_mul_pd( a.value, b.value );
1065}
1066#elif BLAZE_AVX_MODE
1067{
1068 return _mm256_mul_pd( a.value, b.value );
1069}
1070#elif BLAZE_SSE2_MODE
1071{
1072 return _mm_mul_pd( a.value, b.value );
1073}
1074#else
1075= delete;
1076#endif
1077//*************************************************************************************************
1078
1079
1080//*************************************************************************************************
1091 operator*( const SIMDcdouble& a, const SIMDcdouble& b ) noexcept
1092#if BLAZE_AVX512F_MODE
1093{
1094 const __m512d a_ii = _mm512_permute_pd( a.value, 0b1'1'1'1'1'1'1'1 );
1095 const __m512d b_ri = _mm512_permute_pd( b.value, 0b0'1'0'1'0'1'0'1 );
1096 const __m512d a_rr = _mm512_permute_pd( a.value, 0 );
1097 return _mm512_fmaddsub_pd( a_rr, b.value, _mm512_mul_pd( a_ii, b_ri ) );
1098}
1099#elif BLAZE_AVX_MODE
1100{
1101 __m256d x, y, z;
1102
1103 x = _mm256_shuffle_pd( a.value, a.value, 0 );
1104 z = _mm256_mul_pd( x, b.value );
1105 x = _mm256_shuffle_pd( a.value, a.value, 15 );
1106 y = _mm256_shuffle_pd( b.value, b.value, 5 );
1107 y = _mm256_mul_pd( x, y );
1108 return _mm256_addsub_pd( z, y );
1109}
1110#elif BLAZE_SSE3_MODE
1111{
1112 __m128d x, y, z;
1113
1114 x = _mm_shuffle_pd( a.value, a.value, 0 );
1115 z = _mm_mul_pd( x, b.value );
1116 x = _mm_shuffle_pd( a.value, a.value, 3 );
1117 y = _mm_shuffle_pd( b.value, b.value, 1 );
1118 y = _mm_mul_pd( x, y );
1119 return _mm_addsub_pd( z, y );
1120}
1121#else
1122= delete;
1123#endif
1124//*************************************************************************************************
1125
1126} // namespace blaze
1127
1128#endif
Header file for the basic SIMD types.
SIMD type for 64-bit double precision complex values.
SIMD type for 32-bit single precision complex values.
SIMD type for 16-bit signed integral complex values.
SIMD type for 32-bit signed integral complex values.
SIMD type for 64-bit signed integral complex values.
SIMD type for 16-bit unsigned integral complex values.
SIMD type for 32-bit unsigned integral complex values.
SIMD type for 64-bit unsigned integral complex values.
SIMD type for 64-bit double precision floating point data values.
SIMD type for 32-bit single precision floating point data values.
SIMD type for 16-bit signed integral data values.
SIMD type for 32-bit signed integral data values.
SIMD type for 64-bit integral data values.
SIMD type for 16-bit unsigned integral data values.
SIMD type for 32-bit unsigned integral data values.
SIMD type for 64-bit unsigned integral data values.
BLAZE_ALWAYS_INLINE const SIMDcdouble operator*(const SIMDcdouble &a, const SIMDcdouble &b) noexcept=delete
Multiplication of two vectors of double precision complex SIMD values.
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
Expression object for 32-bit floating point multiplication operations.
Definition: Mult.h:756
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:798
SIMDf32< This > BaseType
Base type of this SIMDf32MultExpr instance.
Definition: Mult.h:759
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE SIMDf32MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf32MultExpr class.
Definition: Mult.h:768
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:799
Expression object for 64-bit floating point multiplication operations.
Definition: Mult.h:951
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:993
SIMDf64< This > BaseType
Base type of this SIMDf64MultExpr instance.
Definition: Mult.h:954
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:994
BLAZE_ALWAYS_INLINE SIMDf64MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf64MultExpr class.
Definition: Mult.h:963
System settings for the inline keywords.
System settings for the SSE mode.