Mult.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_MULT_H_
36 #define _BLAZE_MATH_SIMD_MULT_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
44 #include <blaze/system/Inline.h>
46 
47 
48 namespace blaze {
49 
50 //=================================================================================================
51 //
52 // 16-BIT INTEGRAL SIMD TYPES
53 //
54 //=================================================================================================
55 
56 //*************************************************************************************************
66 template< typename T > // Type of both operands
67 BLAZE_ALWAYS_INLINE const T
68  operator*( const SIMDi16<T>& a, const SIMDi16<T>& b ) noexcept
69 #if BLAZE_AVX512BW_MODE
70 {
71  return _mm512_mullo_epi16( (~a).value, (~b).value );
72 }
73 #elif BLAZE_AVX2_MODE
74 {
75  return _mm256_mullo_epi16( (~a).value, (~b).value );
76 }
77 #elif BLAZE_SSE2_MODE
78 {
79  return _mm_mullo_epi16( (~a).value, (~b).value );
80 }
81 #else
82 = delete;
83 #endif
84 //*************************************************************************************************
85 
86 
87 //*************************************************************************************************
97 template< typename T1 // Type of the left-hand side operand
98  , typename T2 > // Type of the right-hand side operand
99 BLAZE_ALWAYS_INLINE const SIMDuint16
100  operator*( const SIMDi16<T1>& a, const SIMDi16<T2>& b ) noexcept
101 #if BLAZE_AVX512BW_MODE
102 {
103  return _mm512_mullo_epi16( (~a).value, (~b).value );
104 }
105 #elif BLAZE_AVX2_MODE
106 {
107  return _mm256_mullo_epi16( (~a).value, (~b).value );
108 }
109 #elif BLAZE_SSE2_MODE
110 {
111  return _mm_mullo_epi16( (~a).value, (~b).value );
112 }
113 #else
114 = delete;
115 #endif
116 //*************************************************************************************************
117 
118 
119 //*************************************************************************************************
129 BLAZE_ALWAYS_INLINE const SIMDcint16
130  operator*( const SIMDcint16& a, const SIMDint16& b ) noexcept
131 #if BLAZE_AVX512BW_MODE
132 {
133  return _mm512_mullo_epi16( (~a).value, (~b).value );
134 }
135 #elif BLAZE_AVX2_MODE
136 {
137  return _mm256_mullo_epi16( (~a).value, (~b).value );
138 }
139 #elif BLAZE_SSE2_MODE
140 {
141  return _mm_mullo_epi16( (~a).value, (~b).value );
142 }
143 #else
144 = delete;
145 #endif
146 //*************************************************************************************************
147 
148 
149 //*************************************************************************************************
159 BLAZE_ALWAYS_INLINE const SIMDcuint16
160  operator*( const SIMDcuint16& a, const SIMDuint16& b ) noexcept
161 #if BLAZE_AVX512BW_MODE
162 {
163  return _mm512_mullo_epi16( (~a).value, (~b).value );
164 }
165 #elif BLAZE_AVX2_MODE
166 {
167  return _mm256_mullo_epi16( (~a).value, (~b).value );
168 }
169 #elif BLAZE_SSE2_MODE
170 {
171  return _mm_mullo_epi16( (~a).value, (~b).value );
172 }
173 #else
174 = delete;
175 #endif
176 //*************************************************************************************************
177 
178 
179 //*************************************************************************************************
189 BLAZE_ALWAYS_INLINE const SIMDcint16
190  operator*( const SIMDint16& a, const SIMDcint16& b ) noexcept
191 #if BLAZE_AVX512BW_MODE
192 {
193  return _mm512_mullo_epi16( (~a).value, (~b).value );
194 }
195 #elif BLAZE_AVX2_MODE
196 {
197  return _mm256_mullo_epi16( (~a).value, (~b).value );
198 }
199 #elif BLAZE_SSE2_MODE
200 {
201  return _mm_mullo_epi16( (~a).value, (~b).value );
202 }
203 #else
204 = delete;
205 #endif
206 //*************************************************************************************************
207 
208 
209 //*************************************************************************************************
219 BLAZE_ALWAYS_INLINE const SIMDcuint16
220  operator*( const SIMDuint16& a, const SIMDcuint16& b ) noexcept
221 #if BLAZE_AVX512BW_MODE
222 {
223  return _mm512_mullo_epi16( (~a).value, (~b).value );
224 }
225 #elif BLAZE_AVX2_MODE
226 {
227  return _mm256_mullo_epi16( (~a).value, (~b).value );
228 }
229 #elif BLAZE_SSE2_MODE
230 {
231  return _mm_mullo_epi16( (~a).value, (~b).value );
232 }
233 #else
234 = delete;
235 #endif
236 //*************************************************************************************************
237 
238 
239 //*************************************************************************************************
249 template< typename T > // Type of both operands
250 BLAZE_ALWAYS_INLINE const T
251  operator*( const SIMDci16<T>& a, const SIMDci16<T>& b ) noexcept
252 #if BLAZE_AVX512BW_MODE
253 {
254  __m512i a_ii = _mm512_shufflelo_epi16( (~a).value, 0b11'11'01'01 );
255  a_ii = _mm512_shufflehi_epi16( a_ii, 0b11'11'01'01 );
256 
257  __m512i b_ri = _mm512_shufflelo_epi16( (~b).value, 0b10'11'00'01 );
258  b_ri = _mm512_shufflehi_epi16( b_ri, 0b10'11'00'01 );
259 
260  __m512i a_rr = _mm512_shufflelo_epi16( (~a).value, 0b10'10'00'00 );
261  a_rr = _mm512_shufflehi_epi16( a_rr, 0b10'10'00'00 );
262 
263  const __m512i a_rr_b = _mm512_mullo_epi16( a_rr, (~b).value );
264  const __m512i a_ii_b_ri = _mm512_mullo_epi16( a_ii, b_ri );
265  const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi16( a_ii_b_ri, 0x55555555,
266  _mm512_setzero_si512(), a_ii_b_ri );
267  return _mm512_add_epi16( a_rr_b, a_ii_b_ri_signed );
268 }
269 #elif BLAZE_AVX2_MODE
270 {
271  __m256i x, y, z;
272  const __m256i neg( _mm256_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
273 
274  x = _mm256_shufflelo_epi16( (~a).value, 0xA0 );
275  x = _mm256_shufflehi_epi16( x, 0xA0 );
276  z = _mm256_mullo_epi16( x, (~b).value );
277  x = _mm256_shufflelo_epi16( (~a).value, 0xF5 );
278  x = _mm256_shufflehi_epi16( x, 0xF5 );
279  y = _mm256_shufflelo_epi16( (~b).value, 0xB1 );
280  y = _mm256_shufflehi_epi16( y, 0xB1 );
281  y = _mm256_mullo_epi16( x, y );
282  y = _mm256_mullo_epi16( y, neg );
283  return _mm256_add_epi16( z, y );
284 }
285 #elif BLAZE_SSE2_MODE
286 {
287  __m128i x, y, z;
288  const __m128i neg( _mm_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1 ) );
289 
290  x = _mm_shufflelo_epi16( (~a).value, 0xA0 );
291  x = _mm_shufflehi_epi16( x, 0xA0 );
292  z = _mm_mullo_epi16( x, (~b).value );
293  x = _mm_shufflelo_epi16( (~a).value, 0xF5 );
294  x = _mm_shufflehi_epi16( x, 0xF5 );
295  y = _mm_shufflelo_epi16( (~b).value, 0xB1 );
296  y = _mm_shufflehi_epi16( y, 0xB1 );
297  y = _mm_mullo_epi16( x, y );
298  y = _mm_mullo_epi16( y, neg );
299  return _mm_add_epi16( z, y );
300 }
301 #else
302 = delete;
303 #endif
304 //*************************************************************************************************
305 
306 
307 
308 
309 //=================================================================================================
310 //
311 // 32-BIT INTEGRAL SIMD TYPES
312 //
313 //=================================================================================================
314 
315 //*************************************************************************************************
325 template< typename T > // Type of both operands
326 BLAZE_ALWAYS_INLINE const T
327  operator*( const SIMDi32<T>& a, const SIMDi32<T>& b ) noexcept
328 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
329 {
330  return _mm512_mullo_epi32( (~a).value, (~b).value );
331 }
332 #elif BLAZE_AVX2_MODE
333 {
334  return _mm256_mullo_epi32( (~a).value, (~b).value );
335 }
336 #elif BLAZE_SSE4_MODE
337 {
338  return _mm_mullo_epi32( (~a).value, (~b).value );
339 }
340 #else
341 = delete;
342 #endif
343 //*************************************************************************************************
344 
345 
346 //*************************************************************************************************
356 template< typename T1 // Type of the left-hand side operand
357  , typename T2 > // Type of the right-hand side operand
358 BLAZE_ALWAYS_INLINE const SIMDuint32
359  operator*( const SIMDi32<T1>& a, const SIMDi32<T2>& b ) noexcept
360 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
361 {
362  return _mm512_mullo_epi32( (~a).value, (~b).value );
363 }
364 #elif BLAZE_AVX2_MODE
365 {
366  return _mm256_mullo_epi32( (~a).value, (~b).value );
367 }
368 #elif BLAZE_SSE4_MODE
369 {
370  return _mm_mullo_epi32( (~a).value, (~b).value );
371 }
372 #else
373 = delete;
374 #endif
375 //*************************************************************************************************
376 
377 
378 //*************************************************************************************************
388 BLAZE_ALWAYS_INLINE const SIMDcint32
389  operator*( const SIMDcint32& a, const SIMDint32& b ) noexcept
390 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
391 {
392  return _mm512_mullo_epi32( (~a).value, (~b).value );
393 }
394 #elif BLAZE_AVX2_MODE
395 {
396  return _mm256_mullo_epi32( (~a).value, (~b).value );
397 }
398 #elif BLAZE_SSE4_MODE
399 {
400  return _mm_mullo_epi32( (~a).value, (~b).value );
401 }
402 #else
403 = delete;
404 #endif
405 //*************************************************************************************************
406 
407 
408 //*************************************************************************************************
418 BLAZE_ALWAYS_INLINE const SIMDcuint32
419  operator*( const SIMDcuint32& a, const SIMDuint32& b ) noexcept
420 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
421 {
422  return _mm512_mullo_epi32( (~a).value, (~b).value );
423 }
424 #elif BLAZE_AVX2_MODE
425 {
426  return _mm256_mullo_epi32( (~a).value, (~b).value );
427 }
428 #elif BLAZE_SSE4_MODE
429 {
430  return _mm_mullo_epi32( (~a).value, (~b).value );
431 }
432 #else
433 = delete;
434 #endif
435 //*************************************************************************************************
436 
437 
438 //*************************************************************************************************
448 template< typename T1 // Type of the left-hand side operand
449  , typename T2 > // Type of the right-hand side operand
450 BLAZE_ALWAYS_INLINE const SIMDcint32
451  operator*( const SIMDint32& a, const SIMDcint32& b ) noexcept
452 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
453 {
454  return _mm512_mullo_epi32( (~a).value, (~b).value );
455 }
456 #elif BLAZE_AVX2_MODE
457 {
458  return _mm256_mullo_epi32( (~a).value, (~b).value );
459 }
460 #elif BLAZE_SSE4_MODE
461 {
462  return _mm_mullo_epi32( (~a).value, (~b).value );
463 }
464 #else
465 = delete;
466 #endif
467 //*************************************************************************************************
468 
469 
470 //*************************************************************************************************
480 template< typename T1 // Type of the left-hand side operand
481  , typename T2 > // Type of the right-hand side operand
482 BLAZE_ALWAYS_INLINE const SIMDcuint32
483  operator*( const SIMDuint32& a, const SIMDcuint32& b ) noexcept
484 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
485 {
486  return _mm512_mullo_epi32( (~a).value, (~b).value );
487 }
488 #elif BLAZE_AVX2_MODE
489 {
490  return _mm256_mullo_epi32( (~a).value, (~b).value );
491 }
492 #elif BLAZE_SSE4_MODE
493 {
494  return _mm_mullo_epi32( (~a).value, (~b).value );
495 }
496 #else
497 = delete;
498 #endif
499 //*************************************************************************************************
500 
501 
502 //*************************************************************************************************
512 template< typename T > // Type of both operands
513 BLAZE_ALWAYS_INLINE const T
514  operator*( const SIMDci32<T>& a, const SIMDci32<T>& b ) noexcept
515 #if BLAZE_AVX512F_MODE
516 {
517  const __m512i a_ii = _mm512_shuffle_epi32( (~a).value, _MM_PERM_DDBB );
518  const __m512i b_ri = _mm512_shuffle_epi32( (~b).value, _MM_PERM_CDAB );
519  const __m512i a_rr = _mm512_shuffle_epi32( (~a).value, _MM_PERM_CCAA );
520 
521  const __m512i a_rr_b = _mm512_mullo_epi32( a_rr, (~b).value );
522  const __m512i a_ii_b_ri = _mm512_mullo_epi32( a_ii, b_ri );
523  const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi32( a_ii_b_ri, 0b0101010101010101,
524  _mm512_setzero_si512(), a_ii_b_ri );
525  return _mm512_add_epi32( a_rr_b, a_ii_b_ri_signed );
526 }
527 #elif BLAZE_AVX2_MODE
528 {
529  __m256i x, y, z;
530  const __m256i neg( _mm256_set_epi32( 1, -1, 1, -1, 1, -1, 1, -1 ) );
531 
532  x = _mm256_shuffle_epi32( (~a).value, 0xA0 );
533  z = _mm256_mullo_epi32( x, (~b).value );
534  x = _mm256_shuffle_epi32( (~a).value, 0xF5 );
535  y = _mm256_shuffle_epi32( (~b).value, 0xB1 );
536  y = _mm256_mullo_epi32( x, y );
537  y = _mm256_mullo_epi32( y, neg );
538  return _mm256_add_epi32( z, y );
539 }
540 #elif BLAZE_SSE4_MODE
541 {
542  __m128i x, y, z;
543  const __m128i neg( _mm_set_epi32( 1, -1, 1, -1 ) );
544 
545  x = _mm_shuffle_epi32( (~a).value, 0xA0 );
546  z = _mm_mullo_epi32( x, (~b).value );
547  x = _mm_shuffle_epi32( (~a).value, 0xF5 );
548  y = _mm_shuffle_epi32( (~b).value, 0xB1 );
549  y = _mm_mullo_epi32( x, y );
550  y = _mm_mullo_epi32( y, neg );
551  return _mm_add_epi32( z, y );
552 }
553 #else
554 = delete;
555 #endif
556 //*************************************************************************************************
557 
558 
559 
560 
561 //=================================================================================================
562 //
563 // 64-BIT INTEGRAL SIMD TYPES
564 //
565 //=================================================================================================
566 
567 //*************************************************************************************************
577 template< typename T > // Type of both operands
578 BLAZE_ALWAYS_INLINE const T
579  operator*( const SIMDi64<T>& a, const SIMDi64<T>& b ) noexcept
580 #if BLAZE_AVX512DQ_MODE
581 {
582  return _mm512_mullo_epi64( (~a).value, (~b).value );
583 }
584 #else
585 = delete;
586 #endif
587 //*************************************************************************************************
588 
589 
590 //*************************************************************************************************
600 template< typename T1 // Type of the left-hand side operand
601  , typename T2 > // Type of the right-hand side operand
602 BLAZE_ALWAYS_INLINE const SIMDuint64
603  operator*( const SIMDi64<T1>& a, const SIMDi64<T2>& b ) noexcept
604 #if BLAZE_AVX512DQ_MODE
605 {
606  return _mm512_mullo_epi64( (~a).value, (~b).value );
607 }
608 #else
609 = delete;
610 #endif
611 //*************************************************************************************************
612 
613 
614 //*************************************************************************************************
624 BLAZE_ALWAYS_INLINE const SIMDcint64
625  operator*( const SIMDcint64& a, const SIMDint64& b ) noexcept
626 #if BLAZE_AVX512DQ_MODE
627 {
628  return _mm512_mullo_epi64( (~a).value, (~b).value );
629 }
630 #else
631 = delete;
632 #endif
633 //*************************************************************************************************
634 
635 
636 //*************************************************************************************************
646 BLAZE_ALWAYS_INLINE const SIMDcuint64
647  operator*( const SIMDcuint64& a, const SIMDuint64& b ) noexcept
648 #if BLAZE_AVX512DQ_MODE
649 {
650  return _mm512_mullo_epi64( (~a).value, (~b).value );
651 }
652 #else
653 = delete;
654 #endif
655 //*************************************************************************************************
656 
657 
658 //*************************************************************************************************
668 template< typename T1 // Type of the left-hand side operand
669  , typename T2 > // Type of the right-hand side operand
670 BLAZE_ALWAYS_INLINE const SIMDcint64
671  operator*( const SIMDint64& a, const SIMDcint64& b ) noexcept
672 #if BLAZE_AVX512DQ_MODE
673 {
674  return _mm512_mullo_epi64( (~a).value, (~b).value );
675 }
676 #else
677 = delete;
678 #endif
679 //*************************************************************************************************
680 
681 
682 //*************************************************************************************************
692 template< typename T1 // Type of the left-hand side operand
693  , typename T2 > // Type of the right-hand side operand
694 BLAZE_ALWAYS_INLINE const SIMDcuint64
695  operator*( const SIMDuint64& a, const SIMDcuint64& b ) noexcept
696 #if BLAZE_AVX512DQ_MODE || BLAZE_MIC_MODE
697 {
698  return _mm512_mullo_epi64( (~a).value, (~b).value );
699 }
700 #else
701 = delete;
702 #endif
703 //*************************************************************************************************
704 
705 
706 //*************************************************************************************************
716 template< typename T > // Type of both operands
717 BLAZE_ALWAYS_INLINE const T
718  operator*( const SIMDci64<T>& a, const SIMDci64<T>& b ) noexcept
719 #if BLAZE_AVX512DQ_MODE
720 {
721  const __m512i a_ii = _mm512_shuffle_epi32( (~a).value, 0b11'10'11'10 );
722  const __m512i b_ri = _mm512_shuffle_epi32( (~b).value, 0b01'00'11'10 );
723  const __m512i a_rr = _mm512_shuffle_epi32( (~a).value, 0b01'00'01'00 );
724 
725  const __m512i a_rr_b = _mm512_mullo_epi64( a_rr, (~b).value );
726  const __m512i a_ii_b_ri = _mm512_mullo_epi64( a_ii, b_ri );
727  const __m512i a_ii_b_ri_signed = _mm512_mask_sub_epi64( a_ii_b_ri, 0b01010101,
728  _mm512_setzero_si512(), a_ii_b_ri );
729  return _mm512_add_epi64( a_rr_b, a_ii_b_ri_signed );
730 }
731 #else
732 = delete;
733 #endif
734 //*************************************************************************************************
735 
736 
737 
738 
739 //=================================================================================================
740 //
741 // 32-BIT FLOATING POINT SIMD TYPES
742 //
743 //=================================================================================================
744 
745 //*************************************************************************************************
752 template< typename T1 // Type of the left-hand side operand
753  , typename T2 > // Type of the right-hand side operand
754 struct SIMDf32MultExpr
755  : public SIMDf32< SIMDf32MultExpr<T1,T2> >
756 {
757  //**Type definitions****************************************************************************
758  using This = SIMDf32MultExpr<T1,T2>; //!< Type of this SIMDf32MultExpr instance.
759  using BaseType = SIMDf32<This>; //!< Base type of this SIMDf32MultExpr instance.
760  //**********************************************************************************************
761 
762  //**Constructor*********************************************************************************
768  explicit BLAZE_ALWAYS_INLINE SIMDf32MultExpr( const T1& a, const T2& b )
769  : a_( a ) // The left-hand side operand for the multiplication
770  , b_( b ) // The right-hand side operand for the multiplication
771  {}
772  //**********************************************************************************************
773 
774  //**Evaluation function*************************************************************************
779  BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept
780 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
781  {
782  return _mm512_mul_ps( a_.eval().value, b_.eval().value );
783  }
784 #elif BLAZE_AVX_MODE
785  {
786  return _mm256_mul_ps( a_.eval().value, b_.eval().value );
787  }
788 #elif BLAZE_SSE_MODE
789  {
790  return _mm_mul_ps( a_.eval().value, b_.eval().value );
791  }
792 #else
793  = delete;
794 #endif
795  //**********************************************************************************************
796 
797  //**Member variables****************************************************************************
798  const T1 a_; //!< The left-hand side operand for the multiplication.
799  const T2 b_; //!< The right-hand side operand for the multiplication.
800  //**********************************************************************************************
801 };
802 //*************************************************************************************************
803 
804 
805 //*************************************************************************************************
815 template< typename T1 // Type of the left-hand side operand
816  , typename T2 > // Type of the right-hand side operand
817 BLAZE_ALWAYS_INLINE const SIMDf32MultExpr<T1,T2>
818  operator*( const SIMDf32<T1>& a, const SIMDf32<T2>& b ) noexcept
819 {
820  return SIMDf32MultExpr<T1,T2>( ~a, ~b );
821 }
822 //*************************************************************************************************
823 
824 
825 //*************************************************************************************************
835 BLAZE_ALWAYS_INLINE const SIMDcfloat
836  operator*( const SIMDcfloat& a, const SIMDfloat& b ) noexcept
837 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
838 {
839  return _mm512_mul_ps( a.value, b.value );
840 }
841 #elif BLAZE_AVX_MODE
842 {
843  return _mm256_mul_ps( a.value, b.value );
844 }
845 #elif BLAZE_SSE_MODE
846 {
847  return _mm_mul_ps( a.value, b.value );
848 }
849 #else
850 = delete;
851 #endif
852 //*************************************************************************************************
853 
854 
855 //*************************************************************************************************
865 BLAZE_ALWAYS_INLINE const SIMDcfloat
866  operator*( const SIMDfloat& a, const SIMDcfloat& b ) noexcept
867 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
868 {
869  return _mm512_mul_ps( a.value, b.value );
870 }
871 #elif BLAZE_AVX_MODE
872 {
873  return _mm256_mul_ps( a.value, b.value );
874 }
875 #elif BLAZE_SSE_MODE
876 {
877  return _mm_mul_ps( a.value, b.value );
878 }
879 #else
880 = delete;
881 #endif
882 //*************************************************************************************************
883 
884 
885 //*************************************************************************************************
895 BLAZE_ALWAYS_INLINE const SIMDcfloat
896  operator*( const SIMDcfloat& a, const SIMDcfloat& b ) noexcept
897 #if BLAZE_AVX512F_MODE
898 {
899  const __m512 a_ii = _mm512_permute_ps( a.value, 0b11'11'01'01 );
900  const __m512 b_ri = _mm512_permute_ps( b.value, 0b10'11'00'01 );
901  const __m512 a_rr = _mm512_permute_ps( a.value, 0b10'10'00'00 );
902  return _mm512_fmaddsub_ps( a_rr, b.value, _mm512_mul_ps( a_ii, b_ri ) );
903 }
904 #elif BLAZE_AVX_MODE
905 {
906  __m256 x, y, z;
907 
908  x = _mm256_shuffle_ps( a.value, a.value, 0xA0 );
909  z = _mm256_mul_ps( x, b.value );
910  x = _mm256_shuffle_ps( a.value, a.value, 0xF5 );
911  y = _mm256_shuffle_ps( b.value, b.value, 0xB1 );
912  y = _mm256_mul_ps( x, y );
913  return _mm256_addsub_ps( z, y );
914 }
915 #elif BLAZE_SSE3_MODE
916 {
917  __m128 x, y, z;
918 
919  x = _mm_shuffle_ps( a.value, a.value, 0xA0 );
920  z = _mm_mul_ps( x, b.value );
921  x = _mm_shuffle_ps( a.value, a.value, 0xF5 );
922  y = _mm_shuffle_ps( b.value, b.value, 0xB1 );
923  y = _mm_mul_ps( x, y );
924  return _mm_addsub_ps( z, y );
925 }
926 #else
927 = delete;
928 #endif
929 //*************************************************************************************************
930 
931 
932 
933 
934 //=================================================================================================
935 //
936 // 64-BIT FLOATING POINT SIMD TYPES
937 //
938 //=================================================================================================
939 
940 //*************************************************************************************************
947 template< typename T1 // Type of the left-hand side operand
948  , typename T2 > // Type of the right-hand side operand
950  : public SIMDf64< SIMDf64MultExpr<T1,T2> >
951 {
952  //**Type definitions****************************************************************************
954  using BaseType = SIMDf64<This>;
955  //**********************************************************************************************
956 
957  //**Constructor*********************************************************************************
963  explicit BLAZE_ALWAYS_INLINE SIMDf64MultExpr( const T1& a, const T2& b )
964  : a_( a ) // The left-hand side operand for the multiplication
965  , b_( b ) // The right-hand side operand for the multiplication
966  {}
967  //**********************************************************************************************
968 
969  //**Evaluation function*************************************************************************
974  BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept
975 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
976  {
977  return _mm512_mul_pd( a_.eval().value, b_.eval().value );
978  }
979 #elif BLAZE_AVX_MODE
980  {
981  return _mm256_mul_pd( a_.eval().value, b_.eval().value );
982  }
983 #elif BLAZE_SSE2_MODE
984  {
985  return _mm_mul_pd( a_.eval().value, b_.eval().value );
986  }
987 #else
988  = delete;
989 #endif
990  //**********************************************************************************************
991 
992  //**Member variables****************************************************************************
993  const T1 a_;
994  const T2 b_;
995  //**********************************************************************************************
996 };
997 //*************************************************************************************************
998 
999 
1000 //*************************************************************************************************
1010 template< typename T1 // Type of the left-hand side operand
1011  , typename T2 > // Type of the right-hand side operand
1013  operator*( const SIMDf64<T1>& a, const SIMDf64<T2>& b ) noexcept
1014 {
1015  return SIMDf64MultExpr<T1,T2>( ~a, ~b );
1016 }
1017 //*************************************************************************************************
1018 
1019 
1020 //*************************************************************************************************
1030 BLAZE_ALWAYS_INLINE const SIMDcdouble
1031  operator*( const SIMDcdouble& a, const SIMDdouble& b ) noexcept
1032 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
1033 {
1034  return _mm512_mul_pd( a.value, b.value );
1035 }
1036 #elif BLAZE_AVX_MODE
1037 {
1038  return _mm256_mul_pd( a.value, b.value );
1039 }
1040 #elif BLAZE_SSE2_MODE
1041 {
1042  return _mm_mul_pd( a.value, b.value );
1043 }
1044 #else
1045 = delete;
1046 #endif
1047 //*************************************************************************************************
1048 
1049 
1050 //*************************************************************************************************
1060 BLAZE_ALWAYS_INLINE const SIMDcdouble
1061  operator*( const SIMDdouble& a, const SIMDcdouble& b ) noexcept
1062 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
1063 {
1064  return _mm512_mul_pd( a.value, b.value );
1065 }
1066 #elif BLAZE_AVX_MODE
1067 {
1068  return _mm256_mul_pd( a.value, b.value );
1069 }
1070 #elif BLAZE_SSE2_MODE
1071 {
1072  return _mm_mul_pd( a.value, b.value );
1073 }
1074 #else
1075 = delete;
1076 #endif
1077 //*************************************************************************************************
1078 
1079 
1080 //*************************************************************************************************
1090 BLAZE_ALWAYS_INLINE const SIMDcdouble
1091  operator*( const SIMDcdouble& a, const SIMDcdouble& b ) noexcept
1092 #if BLAZE_AVX512F_MODE
1093 {
1094  const __m512d a_ii = _mm512_permute_pd( a.value, 0b1'1'1'1'1'1'1'1 );
1095  const __m512d b_ri = _mm512_permute_pd( b.value, 0b0'1'0'1'0'1'0'1 );
1096  const __m512d a_rr = _mm512_permute_pd( a.value, 0 );
1097  return _mm512_fmaddsub_pd( a_rr, b.value, _mm512_mul_pd( a_ii, b_ri ) );
1098 }
1099 #elif BLAZE_AVX_MODE
1100 {
1101  __m256d x, y, z;
1102 
1103  x = _mm256_shuffle_pd( a.value, a.value, 0 );
1104  z = _mm256_mul_pd( x, b.value );
1105  x = _mm256_shuffle_pd( a.value, a.value, 15 );
1106  y = _mm256_shuffle_pd( b.value, b.value, 5 );
1107  y = _mm256_mul_pd( x, y );
1108  return _mm256_addsub_pd( z, y );
1109 }
1110 #elif BLAZE_SSE3_MODE
1111 {
1112  __m128d x, y, z;
1113 
1114  x = _mm_shuffle_pd( a.value, a.value, 0 );
1115  z = _mm_mul_pd( x, b.value );
1116  x = _mm_shuffle_pd( a.value, a.value, 3 );
1117  y = _mm_shuffle_pd( b.value, b.value, 1 );
1118  y = _mm_mul_pd( x, y );
1119  return _mm_addsub_pd( z, y );
1120 }
1121 #else
1122 = delete;
1123 #endif
1124 //*************************************************************************************************
1125 
1126 } // namespace blaze
1127 
1128 #endif
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Expression object for 64-bit floating point multiplication operations.The SIMDf64MultExpr class repre...
Definition: Mult.h:949
SIMD type for 64-bit double precision floating point data values.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
const T2 b_
The right-hand side operand for the multiplication.
Definition: Mult.h:994
decltype(auto) operator*(const DenseMatrix< MT1, false > &lhs, const DenseMatrix< MT2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8908
Header file for the basic SIMD types.
BLAZE_ALWAYS_INLINE SIMDf64MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf64MultExpr class.
Definition: Mult.h:963
System settings for the SSE mode.
SIMDf64< This > BaseType
Base type of this SIMDf64MultExpr instance.
Definition: Mult.h:954
System settings for the inline keywords.
const T1 a_
The left-hand side operand for the multiplication.
Definition: Mult.h:993