Multiplication.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_MULTIPLICATION_H_
36 #define _BLAZE_MATH_SIMD_MULTIPLICATION_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
44 #include <blaze/system/Inline.h>
46 
47 
48 namespace blaze {
49 
50 //=================================================================================================
51 //
52 // 16-BIT INTEGRAL SIMD TYPES
53 //
54 //=================================================================================================
55 
56 //*************************************************************************************************
66 template< typename T > // Type of both operands
67 BLAZE_ALWAYS_INLINE const T
68  operator*( const SIMDi16<T>& a, const SIMDi16<T>& b ) noexcept
69 #if BLAZE_AVX2_MODE
70 {
71  return _mm256_mullo_epi16( (~a).value, (~b).value );
72 }
73 #elif BLAZE_SSE2_MODE
74 {
75  return _mm_mullo_epi16( (~a).value, (~b).value );
76 }
77 #else
78 = delete;
79 #endif
80 //*************************************************************************************************
81 
82 
83 //*************************************************************************************************
93 template< typename T1 // Type of the left-hand side operand
94  , typename T2 > // Type of the right-hand side operand
95 BLAZE_ALWAYS_INLINE const SIMDuint16
96  operator*( const SIMDi16<T1>& a, const SIMDi16<T2>& b ) noexcept
97 #if BLAZE_AVX2_MODE
98 {
99  return _mm256_mullo_epi16( (~a).value, (~b).value );
100 }
101 #elif BLAZE_SSE2_MODE
102 {
103  return _mm_mullo_epi16( (~a).value, (~b).value );
104 }
105 #else
106 = delete;
107 #endif
108 //*************************************************************************************************
109 
110 
111 //*************************************************************************************************
121 BLAZE_ALWAYS_INLINE const SIMDcint16
122  operator*( const SIMDcint16& a, const SIMDint16& b ) noexcept
123 #if BLAZE_AVX2_MODE
124 {
125  return _mm256_mullo_epi16( (~a).value, (~b).value );
126 }
127 #elif BLAZE_SSE2_MODE
128 {
129  return _mm_mullo_epi16( (~a).value, (~b).value );
130 }
131 #else
132 = delete;
133 #endif
134 //*************************************************************************************************
135 
136 
137 //*************************************************************************************************
147 BLAZE_ALWAYS_INLINE const SIMDcuint16
148  operator*( const SIMDcuint16& a, const SIMDuint16& b ) noexcept
149 #if BLAZE_AVX2_MODE
150 {
151  return _mm256_mullo_epi16( (~a).value, (~b).value );
152 }
153 #elif BLAZE_SSE2_MODE
154 {
155  return _mm_mullo_epi16( (~a).value, (~b).value );
156 }
157 #else
158 = delete;
159 #endif
160 //*************************************************************************************************
161 
162 
163 //*************************************************************************************************
173 BLAZE_ALWAYS_INLINE const SIMDcint16
174  operator*( const SIMDint16& a, const SIMDcint16& b ) noexcept
175 #if BLAZE_AVX2_MODE
176 {
177  return _mm256_mullo_epi16( (~a).value, (~b).value );
178 }
179 #elif BLAZE_SSE2_MODE
180 {
181  return _mm_mullo_epi16( (~a).value, (~b).value );
182 }
183 #else
184 = delete;
185 #endif
186 //*************************************************************************************************
187 
188 
189 //*************************************************************************************************
199 BLAZE_ALWAYS_INLINE const SIMDcuint16
200  operator*( const SIMDuint16& a, const SIMDcuint16& b ) noexcept
201 #if BLAZE_AVX2_MODE
202 {
203  return _mm256_mullo_epi16( (~a).value, (~b).value );
204 }
205 #elif BLAZE_SSE2_MODE
206 {
207  return _mm_mullo_epi16( (~a).value, (~b).value );
208 }
209 #else
210 = delete;
211 #endif
212 //*************************************************************************************************
213 
214 
215 //*************************************************************************************************
225 template< typename T > // Type of both operands
226 BLAZE_ALWAYS_INLINE const T
227  operator*( const SIMDci16<T>& a, const SIMDci16<T>& b ) noexcept
228 #if BLAZE_AVX2_MODE
229 {
230  __m256i x, y, z;
231  const __m256i neg( _mm256_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 ) );
232 
233  x = _mm256_shufflelo_epi16( (~a).value, 0xA0 );
234  x = _mm256_shufflehi_epi16( x, 0xA0 );
235  z = _mm256_mullo_epi16( x, (~b).value );
236  x = _mm256_shufflelo_epi16( (~a).value, 0xF5 );
237  x = _mm256_shufflehi_epi16( x, 0xF5 );
238  y = _mm256_shufflelo_epi16( (~b).value, 0xB1 );
239  y = _mm256_shufflehi_epi16( y, 0xB1 );
240  y = _mm256_mullo_epi16( x, y );
241  y = _mm256_mullo_epi16( y, neg );
242  return _mm256_add_epi16( z, y );
243 }
244 #elif BLAZE_SSE2_MODE
245 {
246  __m128i x, y, z;
247  const __m128i neg( _mm_set_epi16( 1, -1, 1, -1, 1, -1, 1, -1 ) );
248 
249  x = _mm_shufflelo_epi16( (~a).value, 0xA0 );
250  x = _mm_shufflehi_epi16( x, 0xA0 );
251  z = _mm_mullo_epi16( x, (~b).value );
252  x = _mm_shufflelo_epi16( (~a).value, 0xF5 );
253  x = _mm_shufflehi_epi16( x, 0xF5 );
254  y = _mm_shufflelo_epi16( (~b).value, 0xB1 );
255  y = _mm_shufflehi_epi16( y, 0xB1 );
256  y = _mm_mullo_epi16( x, y );
257  y = _mm_mullo_epi16( y, neg );
258  return _mm_add_epi16( z, y );
259 }
260 #else
261 = delete;
262 #endif
263 //*************************************************************************************************
264 
265 
266 
267 
268 //=================================================================================================
269 //
270 // 32-BIT INTEGRAL SIMD TYPES
271 //
272 //=================================================================================================
273 
274 //*************************************************************************************************
284 template< typename T > // Type of both operands
285 BLAZE_ALWAYS_INLINE const T
286  operator*( const SIMDi32<T>& a, const SIMDi32<T>& b ) noexcept
287 #if BLAZE_MIC_MODE
288 {
289  return _mm512_mullo_epi32( (~a).value, (~b).value );
290 }
291 #elif BLAZE_AVX2_MODE
292 {
293  return _mm256_mullo_epi32( (~a).value, (~b).value );
294 }
295 #elif BLAZE_SSE4_MODE
296 {
297  return _mm_mullo_epi32( (~a).value, (~b).value );
298 }
299 #else
300 = delete;
301 #endif
302 //*************************************************************************************************
303 
304 
305 //*************************************************************************************************
315 template< typename T1 // Type of the left-hand side operand
316  , typename T2 > // Type of the right-hand side operand
317 BLAZE_ALWAYS_INLINE const SIMDuint32
318  operator*( const SIMDi32<T1>& a, const SIMDi32<T2>& b ) noexcept
319 #if BLAZE_MIC_MODE
320 {
321  return _mm512_mullo_epi32( (~a).value, (~b).value );
322 }
323 #elif BLAZE_AVX2_MODE
324 {
325  return _mm256_mullo_epi32( (~a).value, (~b).value );
326 }
327 #elif BLAZE_SSE4_MODE
328 {
329  return _mm_mullo_epi32( (~a).value, (~b).value );
330 }
331 #else
332 = delete;
333 #endif
334 //*************************************************************************************************
335 
336 
337 //*************************************************************************************************
347 BLAZE_ALWAYS_INLINE const SIMDcint32
348  operator*( const SIMDcint32& a, const SIMDint32& b ) noexcept
349 #if BLAZE_MIC_MODE
350 {
351  return _mm512_mullo_epi32( (~a).value, (~b).value );
352 }
353 #elif BLAZE_AVX2_MODE
354 {
355  return _mm256_mullo_epi32( (~a).value, (~b).value );
356 }
357 #elif BLAZE_SSE4_MODE
358 {
359  return _mm_mullo_epi32( (~a).value, (~b).value );
360 }
361 #else
362 = delete;
363 #endif
364 //*************************************************************************************************
365 
366 
367 //*************************************************************************************************
377 BLAZE_ALWAYS_INLINE const SIMDcuint32
378  operator*( const SIMDcuint32& a, const SIMDuint32& b ) noexcept
379 #if BLAZE_MIC_MODE
380 {
381  return _mm512_mullo_epi32( (~a).value, (~b).value );
382 }
383 #elif BLAZE_AVX2_MODE
384 {
385  return _mm256_mullo_epi32( (~a).value, (~b).value );
386 }
387 #elif BLAZE_SSE4_MODE
388 {
389  return _mm_mullo_epi32( (~a).value, (~b).value );
390 }
391 #else
392 = delete;
393 #endif
394 //*************************************************************************************************
395 
396 
397 //*************************************************************************************************
407 template< typename T1 // Type of the left-hand side operand
408  , typename T2 > // Type of the right-hand side operand
409 BLAZE_ALWAYS_INLINE const SIMDcint32
410  operator*( const SIMDint32& a, const SIMDcint32& b ) noexcept
411 #if BLAZE_MIC_MODE
412 {
413  return _mm512_mullo_epi32( (~a).value, (~b).value );
414 }
415 #elif BLAZE_AVX2_MODE
416 {
417  return _mm256_mullo_epi32( (~a).value, (~b).value );
418 }
419 #elif BLAZE_SSE4_MODE
420 {
421  return _mm_mullo_epi32( (~a).value, (~b).value );
422 }
423 #else
424 = delete;
425 #endif
426 //*************************************************************************************************
427 
428 
429 //*************************************************************************************************
439 template< typename T1 // Type of the left-hand side operand
440  , typename T2 > // Type of the right-hand side operand
441 BLAZE_ALWAYS_INLINE const SIMDcuint32
442  operator*( const SIMDuint32& a, const SIMDcuint32& b ) noexcept
443 #if BLAZE_MIC_MODE
444 {
445  return _mm512_mullo_epi32( (~a).value, (~b).value );
446 }
447 #elif BLAZE_AVX2_MODE
448 {
449  return _mm256_mullo_epi32( (~a).value, (~b).value );
450 }
451 #elif BLAZE_SSE4_MODE
452 {
453  return _mm_mullo_epi32( (~a).value, (~b).value );
454 }
455 #else
456 = delete;
457 #endif
458 //*************************************************************************************************
459 
460 
461 //*************************************************************************************************
471 template< typename T > // Type of both operands
472 BLAZE_ALWAYS_INLINE const T
473  operator*( const SIMDci32<T>& a, const SIMDci32<T>& b ) noexcept
474 #if BLAZE_AVX2_MODE
475 {
476  __m256i x, y, z;
477  const __m256i neg( _mm256_set_epi32( 1, -1, 1, -1, 1, -1, 1, -1 ) );
478 
479  x = _mm256_shuffle_epi32( (~a).value, 0xA0 );
480  z = _mm256_mullo_epi32( x, (~b).value );
481  x = _mm256_shuffle_epi32( (~a).value, 0xF5 );
482  y = _mm256_shuffle_epi32( (~b).value, 0xB1 );
483  y = _mm256_mullo_epi32( x, y );
484  y = _mm256_mullo_epi32( y, neg );
485  return _mm256_add_epi32( z, y );
486 }
487 #elif BLAZE_SSE4_MODE
488 {
489  __m128i x, y, z;
490  const __m128i neg( _mm_set_epi32( 1, -1, 1, -1 ) );
491 
492  x = _mm_shuffle_epi32( (~a).value, 0xA0 );
493  z = _mm_mullo_epi32( x, (~b).value );
494  x = _mm_shuffle_epi32( (~a).value, 0xF5 );
495  y = _mm_shuffle_epi32( (~b).value, 0xB1 );
496  y = _mm_mullo_epi32( x, y );
497  y = _mm_mullo_epi32( y, neg );
498  return _mm_add_epi32( z, y );
499 }
500 #else
501 = delete;
502 #endif
503 //*************************************************************************************************
504 
505 
506 
507 
508 //=================================================================================================
509 //
510 // 32-BIT FLOATING POINT SIMD TYPES
511 //
512 //=================================================================================================
513 
514 //*************************************************************************************************
521 template< typename T1 // Type of the left-hand side operand
522  , typename T2 > // Type of the right-hand side operand
523 struct SIMDf32MultExpr : public SIMDf32< SIMDf32MultExpr<T1,T2> >
524 {
525  //**Type definitions****************************************************************************
527  using BaseType = SIMDf32<This>;
528  //**********************************************************************************************
529 
530  //**Constructor*********************************************************************************
536  explicit BLAZE_ALWAYS_INLINE SIMDf32MultExpr( const T1& a, const T2& b )
537  : a_( a ) // The left-hand side operand for the multiplication
538  , b_( b ) // The right-hand side operand for the multiplication
539  {}
540  //**********************************************************************************************
541 
542  //**Evaluation function*************************************************************************
547  BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept
548 #if BLAZE_MIC_MODE
549  {
550  return _mm512_mul_ps( a_.eval().value, b_.eval().value );
551  }
552 #elif BLAZE_AVX_MODE
553  {
554  return _mm256_mul_ps( a_.eval().value, b_.eval().value );
555  }
556 #elif BLAZE_SSE_MODE
557  {
558  return _mm_mul_ps( a_.eval().value, b_.eval().value );
559  }
560 #else
561  = delete;
562 #endif
563  //**********************************************************************************************
564 
565  //**Member variables****************************************************************************
566  const T1 a_;
567  const T2 b_;
568  //**********************************************************************************************
569 };
570 //*************************************************************************************************
571 
572 
573 //*************************************************************************************************
583 template< typename T1 // Type of the left-hand side operand
584  , typename T2 > // Type of the right-hand side operand
586  operator*( const SIMDf32<T1>& a, const SIMDf32<T2>& b ) noexcept
587 {
588  return SIMDf32MultExpr<T1,T2>( ~a, ~b );
589 }
590 //*************************************************************************************************
591 
592 
593 //*************************************************************************************************
603 BLAZE_ALWAYS_INLINE const SIMDcfloat
604  operator*( const SIMDcfloat& a, const SIMDfloat& b ) noexcept
605 #if BLAZE_MIC_MODE
606 {
607  return _mm512_mul_ps( a.value, b.value );
608 }
609 #elif BLAZE_AVX_MODE
610 {
611  return _mm256_mul_ps( a.value, b.value );
612 }
613 #elif BLAZE_SSE_MODE
614 {
615  return _mm_mul_ps( a.value, b.value );
616 }
617 #else
618 = delete;
619 #endif
620 //*************************************************************************************************
621 
622 
623 //*************************************************************************************************
633 BLAZE_ALWAYS_INLINE const SIMDcfloat
634  operator*( const SIMDfloat& a, const SIMDcfloat& b ) noexcept
635 #if BLAZE_MIC_MODE
636 {
637  return _mm512_mul_ps( a.value, b.value );
638 }
639 #elif BLAZE_AVX_MODE
640 {
641  return _mm256_mul_ps( a.value, b.value );
642 }
643 #elif BLAZE_SSE_MODE
644 {
645  return _mm_mul_ps( a.value, b.value );
646 }
647 #else
648 = delete;
649 #endif
650 //*************************************************************************************************
651 
652 
653 //*************************************************************************************************
663 BLAZE_ALWAYS_INLINE const SIMDcfloat
664  operator*( const SIMDcfloat& a, const SIMDcfloat& b ) noexcept
665 #if BLAZE_AVX_MODE
666 {
667  __m256 x, y, z;
668 
669  x = _mm256_shuffle_ps( a.value, a.value, 0xA0 );
670  z = _mm256_mul_ps( x, b.value );
671  x = _mm256_shuffle_ps( a.value, a.value, 0xF5 );
672  y = _mm256_shuffle_ps( b.value, b.value, 0xB1 );
673  y = _mm256_mul_ps( x, y );
674  return _mm256_addsub_ps( z, y );
675 }
676 #elif BLAZE_SSE3_MODE
677 {
678  __m128 x, y, z;
679 
680  x = _mm_shuffle_ps( a.value, a.value, 0xA0 );
681  z = _mm_mul_ps( x, b.value );
682  x = _mm_shuffle_ps( a.value, a.value, 0xF5 );
683  y = _mm_shuffle_ps( b.value, b.value, 0xB1 );
684  y = _mm_mul_ps( x, y );
685  return _mm_addsub_ps( z, y );
686 }
687 #else
688 = delete;
689 #endif
690 //*************************************************************************************************
691 
692 
693 
694 
695 //=================================================================================================
696 //
697 // 64-BIT FLOATING POINT SIMD TYPES
698 //
699 //=================================================================================================
700 
701 //*************************************************************************************************
708 template< typename T1 // Type of the left-hand side operand
709  , typename T2 > // Type of the right-hand side operand
710 struct SIMDf64MultExpr : public SIMDf64< SIMDf64MultExpr<T1,T2> >
711 {
712  //**Type definitions****************************************************************************
714  using BaseType = SIMDf64<This>;
715  //**********************************************************************************************
716 
717  //**Constructor*********************************************************************************
723  explicit BLAZE_ALWAYS_INLINE SIMDf64MultExpr( const T1& a, const T2& b )
724  : a_( a ) // The left-hand side operand for the multiplication
725  , b_( b ) // The right-hand side operand for the multiplication
726  {}
727  //**********************************************************************************************
728 
729  //**Evaluation function*************************************************************************
734  BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept
735 #if BLAZE_MIC_MODE
736  {
737  return _mm512_mul_pd( a_.eval().value, b_.eval().value );
738  }
739 #elif BLAZE_AVX_MODE
740  {
741  return _mm256_mul_pd( a_.eval().value, b_.eval().value );
742  }
743 #elif BLAZE_SSE2_MODE
744  {
745  return _mm_mul_pd( a_.eval().value, b_.eval().value );
746  }
747 #else
748  = delete;
749 #endif
750  //**********************************************************************************************
751 
752  //**Member variables****************************************************************************
753  const T1 a_;
754  const T2 b_;
755  //**********************************************************************************************
756 };
757 //*************************************************************************************************
758 
759 
760 //*************************************************************************************************
770 template< typename T1 // Type of the left-hand side operand
771  , typename T2 > // Type of the right-hand side operand
773  operator*( const SIMDf64<T1>& a, const SIMDf64<T2>& b ) noexcept
774 {
775  return SIMDf64MultExpr<T1,T2>( ~a, ~b );
776 }
777 //*************************************************************************************************
778 
779 
780 //*************************************************************************************************
790 BLAZE_ALWAYS_INLINE const SIMDcdouble
791  operator*( const SIMDcdouble& a, const SIMDdouble& b ) noexcept
792 #if BLAZE_MIC_MODE
793 {
794  return _mm512_mul_pd( a.value, b.value );
795 }
796 #elif BLAZE_AVX_MODE
797 {
798  return _mm256_mul_pd( a.value, b.value );
799 }
800 #elif BLAZE_SSE2_MODE
801 {
802  return _mm_mul_pd( a.value, b.value );
803 }
804 #else
805 = delete;
806 #endif
807 //*************************************************************************************************
808 
809 
810 //*************************************************************************************************
820 BLAZE_ALWAYS_INLINE const SIMDcdouble
821  operator*( const SIMDdouble& a, const SIMDcdouble& b ) noexcept
822 #if BLAZE_MIC_MODE
823 {
824  return _mm512_mul_pd( a.value, b.value );
825 }
826 #elif BLAZE_AVX_MODE
827 {
828  return _mm256_mul_pd( a.value, b.value );
829 }
830 #elif BLAZE_SSE2_MODE
831 {
832  return _mm_mul_pd( a.value, b.value );
833 }
834 #else
835 = delete;
836 #endif
837 //*************************************************************************************************
838 
839 
840 //*************************************************************************************************
850 BLAZE_ALWAYS_INLINE const SIMDcdouble
851  operator*( const SIMDcdouble& a, const SIMDcdouble& b ) noexcept
852 #if BLAZE_AVX_MODE
853 {
854  __m256d x, y, z;
855 
856  x = _mm256_shuffle_pd( a.value, a.value, 0 );
857  z = _mm256_mul_pd( x, b.value );
858  x = _mm256_shuffle_pd( a.value, a.value, 15 );
859  y = _mm256_shuffle_pd( b.value, b.value, 5 );
860  y = _mm256_mul_pd( x, y );
861  return _mm256_addsub_pd( z, y );
862 }
863 #elif BLAZE_SSE3_MODE
864 {
865  __m128d x, y, z;
866 
867  x = _mm_shuffle_pd( a.value, a.value, 0 );
868  z = _mm_mul_pd( x, b.value );
869  x = _mm_shuffle_pd( a.value, a.value, 3 );
870  y = _mm_shuffle_pd( b.value, b.value, 1 );
871  y = _mm_mul_pd( x, y );
872  return _mm_addsub_pd( z, y );
873 }
874 #else
875 = delete;
876 #endif
877 //*************************************************************************************************
878 
879 } // namespace blaze
880 
881 #endif
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7800
Expression object for 64-bit floating point multiplication operations.The SIMDf64MultExpr class repre...
Definition: Multiplication.h:710
SIMD type for 64-bit double precision floating point data values.
SIMDf32< This > BaseType
Base type of this SIMDf32MultExpr instance.
Definition: Multiplication.h:527
BLAZE_ALWAYS_INLINE const SIMDfloat eval() const noexcept=delete
Evaluation of the expression object.
BLAZE_ALWAYS_INLINE const SIMDdouble eval() const noexcept=delete
Evaluation of the expression object.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
#define BLAZE_ALWAYS_INLINE
Platform dependent setup of an enforced inline keyword.
Definition: Inline.h:85
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:754
Header file for the basic SIMD types.
BLAZE_ALWAYS_INLINE SIMDf32MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf32MultExpr class.
Definition: Multiplication.h:536
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:566
SIMD type for 32-bit single precision floating point data values.
BLAZE_ALWAYS_INLINE SIMDf64MultExpr(const T1 &a, const T2 &b)
Constructor for the SIMDf64MultExpr class.
Definition: Multiplication.h:723
System settings for the SSE mode.
Expression object for 32-bit floating point multiplication operations.The SIMDf32MultExpr class repre...
Definition: Multiplication.h:523
const T2 b_
The right-hand side operand for the multiplication.
Definition: Multiplication.h:567
SIMDf64< This > BaseType
Base type of this SIMDf64MultExpr instance.
Definition: Multiplication.h:714
System settings for the inline keywords.
const T1 a_
The left-hand side operand for the multiplication.
Definition: Multiplication.h:753