Blaze  3.6
SMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
52 #include <blaze/math/Exception.h>
64 #include <blaze/math/shims/Reset.h>
66 #include <blaze/math/SIMD.h>
92 #include <blaze/math/views/Check.h>
97 #include <blaze/util/Assert.h>
98 #include <blaze/util/DisableIf.h>
99 #include <blaze/util/EnableIf.h>
102 #include <blaze/util/MaybeUnused.h>
103 #include <blaze/util/mpl/If.h>
104 #include <blaze/util/Types.h>
106 
107 
108 namespace blaze {
109 
110 //=================================================================================================
111 //
112 // CLASS SMATDMATMULTEXPR
113 //
114 //=================================================================================================
115 
116 //*************************************************************************************************
123 template< typename MT1 // Type of the left-hand side sparse matrix
124  , typename MT2 // Type of the right-hand side dense matrix
125  , bool SF // Symmetry flag
126  , bool HF // Hermitian flag
127  , bool LF // Lower flag
128  , bool UF > // Upper flag
129 class SMatDMatMultExpr
130  : public MatMatMultExpr< DenseMatrix< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false > >
131  , private Computation
132 {
133  private:
134  //**Type definitions****************************************************************************
141  //**********************************************************************************************
142 
143  //**********************************************************************************************
145  static constexpr bool evaluateLeft = ( IsComputation_v<MT1> || RequiresEvaluation_v<MT1> );
146  //**********************************************************************************************
147 
148  //**********************************************************************************************
150  static constexpr bool evaluateRight = ( IsComputation_v<MT2> || RequiresEvaluation_v<MT2> );
151  //**********************************************************************************************
152 
153  //**********************************************************************************************
154  static constexpr bool SYM = ( SF && !( HF || LF || UF ) );
155  static constexpr bool HERM = ( HF && !( LF || UF ) );
156  static constexpr bool LOW = ( LF || ( ( SF || HF ) && UF ) );
157  static constexpr bool UPP = ( UF || ( ( SF || HF ) && LF ) );
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
166  template< typename T1, typename T2, typename T3 >
167  static constexpr bool IsEvaluationRequired_v = ( evaluateLeft || evaluateRight );
169  //**********************************************************************************************
170 
171  //**********************************************************************************************
173 
176  template< typename T1, typename T2, typename T3 >
177  static constexpr bool UseVectorizedKernel_v =
178  ( useOptimizedKernels &&
179  !IsDiagonal_v<T3> &&
180  T1::simdEnabled && T3::simdEnabled &&
181  IsRowMajorMatrix_v<T1> &&
182  IsSIMDCombinable_v< ElementType_t<T1>
184  , ElementType_t<T3> > &&
185  HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
186  HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
188  //**********************************************************************************************
189 
190  //**********************************************************************************************
192 
196  template< typename T1, typename T2, typename T3 >
197  static constexpr bool UseOptimizedKernel_v =
198  ( useOptimizedKernels &&
199  !UseVectorizedKernel_v<T1,T2,T3> &&
200  !IsDiagonal_v<T3> &&
201  !IsResizable_v< ElementType_t<T1> > &&
202  !IsResizable_v<ET1> );
204  //**********************************************************************************************
205 
206  //**********************************************************************************************
208 
211  template< typename T1, typename T2, typename T3 >
212  static constexpr bool UseDefaultKernel_v =
213  ( !UseVectorizedKernel_v<T1,T2,T3> &&
214  !UseOptimizedKernel_v<T1,T2,T3> );
216  //**********************************************************************************************
217 
218  //**********************************************************************************************
220 
223  using ForwardFunctor = If_t< HERM
224  , DeclHerm
225  , If_t< SYM
226  , DeclSym
227  , If_t< LOW
228  , If_t< UPP
229  , DeclDiag
230  , DeclLow >
231  , If_t< UPP
232  , DeclUpp
233  , Noop > > > >;
235  //**********************************************************************************************
236 
237  public:
238  //**Type definitions****************************************************************************
241 
244 
246  using ResultType = typename If_t< HERM
248  , If_t< SYM
250  , If_t< LOW
251  , If_t< UPP
254  , If_t< UPP
256  , MultTrait<RT1,RT2> > > > >::Type;
257 
262  using ReturnType = const ElementType;
263  using CompositeType = const ResultType;
264 
266  using LeftOperand = If_t< IsExpression_v<MT1>, const MT1, const MT1& >;
267 
269  using RightOperand = If_t< IsExpression_v<MT2>, const MT2, const MT2& >;
270 
273 
276  //**********************************************************************************************
277 
278  //**Compilation flags***************************************************************************
280  static constexpr bool simdEnabled =
281  ( !IsDiagonal_v<MT2> &&
282  MT2::simdEnabled &&
283  HasSIMDAdd_v<ET1,ET2> &&
284  HasSIMDMult_v<ET1,ET2> );
285 
287  static constexpr bool smpAssignable =
288  ( !evaluateLeft && MT1::smpAssignable && !evaluateRight && MT2::smpAssignable );
289  //**********************************************************************************************
290 
291  //**SIMD properties*****************************************************************************
293  static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
294  //**********************************************************************************************
295 
296  //**Constructor*********************************************************************************
302  explicit inline SMatDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
303  : lhs_( lhs ) // Left-hand side sparse matrix of the multiplication expression
304  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
305  {
306  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
307  }
308  //**********************************************************************************************
309 
310  //**Access operator*****************************************************************************
317  inline ReturnType operator()( size_t i, size_t j ) const {
318  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
319  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
320 
321  if( IsDiagonal_v<MT1> ) {
322  return lhs_(i,i) * rhs_(i,j);
323  }
324  else if( IsDiagonal_v<MT2> ) {
325  return lhs_(i,j) * rhs_(j,j);
326  }
327  else if( IsTriangular_v<MT1> || IsTriangular_v<MT2> ) {
328  const size_t begin( ( IsUpper_v<MT1> )
329  ?( ( IsLower_v<MT2> )
330  ?( max( ( IsStrictlyUpper_v<MT1> ? i+1UL : i )
331  , ( IsStrictlyLower_v<MT2> ? j+1UL : j ) ) )
332  :( IsStrictlyUpper_v<MT1> ? i+1UL : i ) )
333  :( ( IsLower_v<MT2> )
334  ?( IsStrictlyLower_v<MT2> ? j+1UL : j )
335  :( 0UL ) ) );
336  const size_t end( ( IsLower_v<MT1> )
337  ?( ( IsUpper_v<MT2> )
338  ?( min( ( IsStrictlyLower_v<MT1> ? i : i+1UL )
339  , ( IsStrictlyUpper_v<MT2> ? j : j+1UL ) ) )
340  :( IsStrictlyLower_v<MT1> ? i : i+1UL ) )
341  :( ( IsUpper_v<MT2> )
342  ?( IsStrictlyUpper_v<MT2> ? j : j+1UL )
343  :( lhs_.columns() ) ) );
344 
345  if( begin >= end ) return ElementType();
346 
347  const size_t n( end - begin );
348 
349  return subvector( row( lhs_, i, unchecked ), begin, n, unchecked ) *
350  subvector( column( rhs_, j, unchecked ), begin, n, unchecked );
351  }
352  else {
353  return row( lhs_, i, unchecked ) * column( rhs_, j, unchecked );
354  }
355  }
356  //**********************************************************************************************
357 
358  //**At function*********************************************************************************
366  inline ReturnType at( size_t i, size_t j ) const {
367  if( i >= lhs_.rows() ) {
368  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
369  }
370  if( j >= rhs_.columns() ) {
371  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
372  }
373  return (*this)(i,j);
374  }
375  //**********************************************************************************************
376 
377  //**Rows function*******************************************************************************
382  inline size_t rows() const noexcept {
383  return lhs_.rows();
384  }
385  //**********************************************************************************************
386 
387  //**Columns function****************************************************************************
392  inline size_t columns() const noexcept {
393  return rhs_.columns();
394  }
395  //**********************************************************************************************
396 
397  //**Left operand access*************************************************************************
402  inline LeftOperand leftOperand() const noexcept {
403  return lhs_;
404  }
405  //**********************************************************************************************
406 
407  //**Right operand access************************************************************************
412  inline RightOperand rightOperand() const noexcept {
413  return rhs_;
414  }
415  //**********************************************************************************************
416 
417  //**********************************************************************************************
423  template< typename T >
424  inline bool canAlias( const T* alias ) const noexcept {
425  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
426  }
427  //**********************************************************************************************
428 
429  //**********************************************************************************************
435  template< typename T >
436  inline bool isAliased( const T* alias ) const noexcept {
437  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
438  }
439  //**********************************************************************************************
440 
441  //**********************************************************************************************
446  inline bool isAligned() const noexcept {
447  return rhs_.isAligned();
448  }
449  //**********************************************************************************************
450 
451  //**********************************************************************************************
456  inline bool canSMPAssign() const noexcept {
457  return ( rows() * columns() >= SMP_SMATDMATMULT_THRESHOLD ) && !IsDiagonal_v<MT2>;
458  }
459  //**********************************************************************************************
460 
461  private:
462  //**Member variables****************************************************************************
465  //**********************************************************************************************
466 
467  //**Assignment to dense matrices****************************************************************
480  template< typename MT // Type of the target dense matrix
481  , bool SO > // Storage order of the target dense matrix
482  friend inline void assign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
483  {
485 
486  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
487  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
488 
489  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
490  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
491 
492  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
493  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
494  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
495  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
496  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
497  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
498 
499  SMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
500  }
502  //**********************************************************************************************
503 
504  //**Default assignment to dense matrices********************************************************
518  template< typename MT3 // Type of the left-hand side target matrix
519  , typename MT4 // Type of the left-hand side matrix operand
520  , typename MT5 > // Type of the right-hand side matrix operand
521  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
523  {
524  const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
525 
526  reset( C );
527 
528  for( size_t jj=0UL; jj<B.columns(); jj+=block )
529  {
530  const size_t jtmp( min( jj+block, B.columns() ) );
531 
532  for( size_t i=0UL; i<A.rows(); ++i )
533  {
534  auto element( A.begin(i) );
535  const auto end( A.end(i) );
536 
537  for( ; element!=end; ++element )
538  {
539  const size_t i1( element->index() );
540 
541  if( IsDiagonal_v<MT5> )
542  {
543  C(i,i1) = element->value() * B(i1,i1);
544  }
545  else
546  {
547  const size_t jbegin( ( IsUpper_v<MT5> )
548  ?( ( UPP )
549  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
550  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
551  :( jj ) );
552  const size_t jend( ( IsLower_v<MT5> )
553  ?( ( SYM || HERM || LOW )
554  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
555  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
556  :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
557 
558  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
559  continue;
560 
561  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
562 
563  for( size_t j=jbegin; j<jend; ++j ) {
564  if( isDefault( C(i,j) ) )
565  C(i,j) = element->value() * B(i1,j);
566  else
567  C(i,j) += element->value() * B(i1,j);
568  }
569  }
570  }
571  }
572  }
573 
574  if( SYM || HERM ) {
575  for( size_t i=0UL; i<A.rows(); ++i ) {
576  for( size_t j=i+1UL; j<B.columns(); ++j ) {
577  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
578  }
579  }
580  }
581  }
583  //**********************************************************************************************
584 
585  //**Optimized assignment to dense matrices******************************************************
599  template< typename MT3 // Type of the left-hand side target matrix
600  , typename MT4 // Type of the left-hand side matrix operand
601  , typename MT5 > // Type of the right-hand side matrix operand
602  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
603  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
604  {
605  const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
606 
607  reset( C );
608 
609  for( size_t jj=0UL; jj<B.columns(); jj+=block )
610  {
611  const size_t jtmp( min( jj+block, B.columns() ) );
612 
613  for( size_t i=0UL; i<A.rows(); ++i )
614  {
615  const auto end( A.end(i) );
616  auto element( A.begin(i) );
617 
618  const size_t nonzeros( A.nonZeros(i) );
619  const size_t kpos( nonzeros & size_t(-4) );
620  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
621 
622  for( size_t k=0UL; k<kpos; k+=4UL )
623  {
624  const size_t i1( element->index() );
625  const ET1 v1( element->value() );
626  ++element;
627  const size_t i2( element->index() );
628  const ET1 v2( element->value() );
629  ++element;
630  const size_t i3( element->index() );
631  const ET1 v3( element->value() );
632  ++element;
633  const size_t i4( element->index() );
634  const ET1 v4( element->value() );
635  ++element;
636 
637  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
638 
639  const size_t jbegin( ( IsUpper_v<MT5> )
640  ?( ( UPP )
641  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
642  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
643  :( UPP ? max(i,jj) : jj ) );
644  const size_t jend( ( IsLower_v<MT5> )
645  ?( ( SYM || HERM || LOW )
646  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
647  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
648  :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
649 
650  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
651  continue;
652 
653  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
654 
655  const size_t jnum( jend - jbegin );
656  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
657  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
658 
659  for( size_t j=jbegin; j<jpos; j+=4UL ) {
660  C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
661  C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
662  C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
663  C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
664  }
665  for( size_t j=jpos; j<jend; ++j ) {
666  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
667  }
668  }
669 
670  for( ; element!=end; ++element )
671  {
672  const size_t i1( element->index() );
673  const ET1 v1( element->value() );
674 
675  const size_t jbegin( ( IsUpper_v<MT5> )
676  ?( ( UPP )
677  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
678  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
679  :( UPP ? max(i,jj) : jj ) );
680  const size_t jend( ( IsLower_v<MT5> )
681  ?( ( SYM || HERM || LOW )
682  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
683  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
684  :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
685 
686  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
687  continue;
688 
689  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
690 
691  const size_t jnum( jend - jbegin );
692  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
693  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
694 
695  for( size_t j=jbegin; j<jpos; j+=4UL ) {
696  C(i,j ) += v1 * B(i1,j );
697  C(i,j+1UL) += v1 * B(i1,j+1UL);
698  C(i,j+2UL) += v1 * B(i1,j+2UL);
699  C(i,j+3UL) += v1 * B(i1,j+3UL);
700  }
701  for( size_t j=jpos; j<jend; ++j ) {
702  C(i,j) += v1 * B(i1,j);
703  }
704  }
705  }
706  }
707 
708  if( SYM || HERM ) {
709  for( size_t i=0UL; i<A.rows(); ++i ) {
710  for( size_t j=i+1UL; j<B.columns(); ++j ) {
711  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
712  }
713  }
714  }
715  }
717  //**********************************************************************************************
718 
719  //**Vectorized assignment to dense matrices*****************************************************
733  template< typename MT3 // Type of the left-hand side target matrix
734  , typename MT4 // Type of the left-hand side matrix operand
735  , typename MT5 > // Type of the right-hand side matrix operand
736  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
737  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
738  {
739  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
740 
741  reset( C );
742 
743  for( size_t i=0UL; i<A.rows(); ++i )
744  {
745  const auto end( A.end(i) );
746  auto element( A.begin(i) );
747 
748  const size_t nonzeros( A.nonZeros(i) );
749  const size_t kpos( nonzeros & size_t(-4) );
750  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
751 
752  for( size_t k=0UL; k<kpos; k+=4UL )
753  {
754  const size_t i1( element->index() );
755  const ET1 v1( element->value() );
756  ++element;
757  const size_t i2( element->index() );
758  const ET1 v2( element->value() );
759  ++element;
760  const size_t i3( element->index() );
761  const ET1 v3( element->value() );
762  ++element;
763  const size_t i4( element->index() );
764  const ET1 v4( element->value() );
765  ++element;
766 
767  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
768 
769  const SIMDType xmm1( set( v1 ) );
770  const SIMDType xmm2( set( v2 ) );
771  const SIMDType xmm3( set( v3 ) );
772  const SIMDType xmm4( set( v4 ) );
773 
774  const size_t jbegin( ( IsUpper_v<MT5> )
775  ?( ( IsStrictlyUpper_v<MT5> )
776  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
777  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
778  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
779  const size_t jend( ( IsLower_v<MT5> )
780  ?( ( IsStrictlyLower_v<MT5> )
781  ?( SYM || HERM || LOW ? min(i+1UL,i4) : i4 )
782  :( SYM || HERM || LOW ? min(i,i4)+1UL : i4+1UL ) )
783  :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
784  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
785 
786  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
787  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
788 
789  size_t j( jbegin );
790 
791  for( ; j<jpos; j+=SIMDSIZE ) {
792  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
793  }
794  for( ; remainder && j<jend; ++j ) {
795  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
796  }
797  }
798 
799  for( ; element!=end; ++element )
800  {
801  const size_t i1( element->index() );
802  const ET1 v1( element->value() );
803 
804  const SIMDType xmm1( set( v1 ) );
805 
806  const size_t jbegin( ( IsUpper_v<MT5> )
807  ?( ( IsStrictlyUpper_v<MT5> )
808  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
809  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
810  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
811  const size_t jend( ( IsLower_v<MT5> )
812  ?( ( IsStrictlyLower_v<MT5> )
813  ?( SYM || HERM || LOW ? min(i+1UL,i1) : i1 )
814  :( SYM || HERM || LOW ? min(i,i1)+1UL : i1+1UL ) )
815  :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
816  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
817 
818  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
819  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
820 
821  size_t j( jbegin );
822 
823  for( ; j<jpos; j+=SIMDSIZE ) {
824  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
825  }
826  for( ; remainder && j<jend; ++j ) {
827  C(i,j) += v1 * B(i1,j);
828  }
829  }
830  }
831 
832  if( SYM || HERM ) {
833  for( size_t i=0UL; i<A.rows(); ++i ) {
834  for( size_t j=i+1UL; j<B.columns(); ++j ) {
835  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
836  }
837  }
838  }
839  }
841  //**********************************************************************************************
842 
843  //**Assignment to sparse matrices***************************************************************
856  template< typename MT // Type of the target sparse matrix
857  , bool SO > // Storage order of the target sparse matrix
858  friend inline void assign( SparseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
859  {
861 
862  using TmpType = If_t< SO, OppositeType, ResultType >;
863 
870 
871  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
872  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
873 
874  const ForwardFunctor fwd;
875 
876  const TmpType tmp( serial( rhs ) );
877  assign( ~lhs, fwd( tmp ) );
878  }
880  //**********************************************************************************************
881 
882  //**Addition assignment to dense matrices*******************************************************
895  template< typename MT // Type of the target dense matrix
896  , bool SO > // Storage order of the target dense matrix
897  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
898  {
900 
901  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
902  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
903 
904  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
905  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
906 
907  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
908  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
909  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
910  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
911  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
912  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
913 
914  SMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
915  }
917  //**********************************************************************************************
918 
919  //**Default addition assignment to dense matrices***********************************************
933  template< typename MT3 // Type of the left-hand side target matrix
934  , typename MT4 // Type of the left-hand side matrix operand
935  , typename MT5 > // Type of the right-hand side matrix operand
936  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
937  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
938  {
939  const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
940 
941  for( size_t jj=0UL; jj<B.columns(); jj+=block )
942  {
943  const size_t jtmp( min( jj+block, B.columns() ) );
944 
945  for( size_t i=0UL; i<A.rows(); ++i )
946  {
947  const auto end( A.end(i) );
948  auto element( A.begin(i) );
949 
950  for( ; element!=end; ++element )
951  {
952  const size_t i1( element->index() );
953 
954  if( IsDiagonal_v<MT5> )
955  {
956  C(i,i1) += element->value() * B(i1,i1);
957  }
958  else
959  {
960  const size_t jbegin( ( IsUpper_v<MT5> )
961  ?( ( UPP )
962  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
963  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
964  :( jj ) );
965  const size_t jend( ( IsLower_v<MT5> )
966  ?( ( LOW )
967  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
968  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
969  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
970 
971  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
972  continue;
973 
974  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
975 
976  const size_t jnum( jend - jbegin );
977  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
978  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
979 
980  for( size_t j=jbegin; j<jpos; j+=4UL ) {
981  C(i,j ) += element->value() * B(i1,j );
982  C(i,j+1UL) += element->value() * B(i1,j+1UL);
983  C(i,j+2UL) += element->value() * B(i1,j+2UL);
984  C(i,j+3UL) += element->value() * B(i1,j+3UL);
985  }
986  for( size_t j=jpos; j<jend; ++j ) {
987  C(i,j) += element->value() * B(i1,j);
988  }
989  }
990  }
991  }
992  }
993  }
995  //**********************************************************************************************
996 
997  //**Optimized addition assignment to dense matrices*********************************************
1011  template< typename MT3 // Type of the left-hand side target matrix
1012  , typename MT4 // Type of the left-hand side matrix operand
1013  , typename MT5 > // Type of the right-hand side matrix operand
1014  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1015  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1016  {
1017  const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
1018 
1019  for( size_t jj=0UL; jj<B.columns(); jj+=block )
1020  {
1021  const size_t jtmp( min( jj+block, B.columns() ) );
1022 
1023  for( size_t i=0UL; i<A.rows(); ++i )
1024  {
1025  const auto end( A.end(i) );
1026  auto element( A.begin(i) );
1027 
1028  const size_t nonzeros( A.nonZeros(i) );
1029  const size_t kpos( nonzeros & size_t(-4) );
1030  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1031 
1032  for( size_t k=0UL; k<kpos; k+=4UL )
1033  {
1034  const size_t i1( element->index() );
1035  const ET1 v1( element->value() );
1036  ++element;
1037  const size_t i2( element->index() );
1038  const ET1 v2( element->value() );
1039  ++element;
1040  const size_t i3( element->index() );
1041  const ET1 v3( element->value() );
1042  ++element;
1043  const size_t i4( element->index() );
1044  const ET1 v4( element->value() );
1045  ++element;
1046 
1047  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1048 
1049  const size_t jbegin( ( IsUpper_v<MT5> )
1050  ?( ( UPP )
1051  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1052  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1053  :( UPP ? max(i,jj) : jj ) );
1054  const size_t jend( ( IsLower_v<MT5> )
1055  ?( ( LOW )
1056  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
1057  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
1058  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1059 
1060  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1061  continue;
1062 
1063  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1064 
1065  const size_t jnum( jend - jbegin );
1066  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1067  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1068 
1069  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1070  C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1071  C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1072  C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1073  C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1074  }
1075  for( size_t j=jpos; j<jend; ++j ) {
1076  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1077  }
1078  }
1079 
1080  for( ; element!=end; ++element )
1081  {
1082  const size_t i1( element->index() );
1083  const ET1 v1( element->value() );
1084 
1085  const size_t jbegin( ( IsUpper_v<MT5> )
1086  ?( ( UPP )
1087  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1088  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1089  :( UPP ? max(i,jj) : jj ) );
1090  const size_t jend( ( IsLower_v<MT5> )
1091  ?( ( LOW )
1092  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1093  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1094  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1095 
1096  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1097  continue;
1098 
1099  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1100 
1101  const size_t jnum( jend - jbegin );
1102  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1103  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1104 
1105  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1106  C(i,j ) += v1 * B(i1,j );
1107  C(i,j+1UL) += v1 * B(i1,j+1UL);
1108  C(i,j+2UL) += v1 * B(i1,j+2UL);
1109  C(i,j+3UL) += v1 * B(i1,j+3UL);
1110  }
1111  for( size_t j=jpos; j<jend; ++j ) {
1112  C(i,j) += v1 * B(i1,j);
1113  }
1114  }
1115  }
1116  }
1117  }
1119  //**********************************************************************************************
1120 
1121  //**Vectorized addition assignment to dense matrices********************************************
1135  template< typename MT3 // Type of the left-hand side target matrix
1136  , typename MT4 // Type of the left-hand side matrix operand
1137  , typename MT5 > // Type of the right-hand side matrix operand
1138  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1139  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1140  {
1141  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
1142 
1143  for( size_t i=0UL; i<A.rows(); ++i )
1144  {
1145  const auto end( A.end(i) );
1146  auto element( A.begin(i) );
1147 
1148  const size_t nonzeros( A.nonZeros(i) );
1149  const size_t kpos( nonzeros & size_t(-4) );
1150  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1151 
1152  for( size_t k=0UL; k<kpos; k+=4UL )
1153  {
1154  const size_t i1( element->index() );
1155  const ET1 v1( element->value() );
1156  ++element;
1157  const size_t i2( element->index() );
1158  const ET1 v2( element->value() );
1159  ++element;
1160  const size_t i3( element->index() );
1161  const ET1 v3( element->value() );
1162  ++element;
1163  const size_t i4( element->index() );
1164  const ET1 v4( element->value() );
1165  ++element;
1166 
1167  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1168 
1169  const SIMDType xmm1( set( v1 ) );
1170  const SIMDType xmm2( set( v2 ) );
1171  const SIMDType xmm3( set( v3 ) );
1172  const SIMDType xmm4( set( v4 ) );
1173 
1174  const size_t jbegin( ( IsUpper_v<MT5> )
1175  ?( ( IsStrictlyUpper_v<MT5> )
1176  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1177  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1178  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1179  const size_t jend( ( IsLower_v<MT5> )
1180  ?( ( IsStrictlyLower_v<MT5> )
1181  ?( LOW ? min(i+1UL,i4) : i4 )
1182  :( LOW ? min(i,i4)+1UL : i4+1UL ) )
1183  :( LOW ? i+1UL : B.columns() ) );
1184  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1185 
1186  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1187  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1188 
1189  size_t j( jbegin );
1190 
1191  for( ; j<jpos; j+=SIMDSIZE ) {
1192  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
1193  }
1194  for( ; remainder && j<jend; ++j ) {
1195  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1196  }
1197  }
1198 
1199  for( ; element!=end; ++element )
1200  {
1201  const size_t i1( element->index() );
1202  const ET1 v1( element->value() );
1203 
1204  const SIMDType xmm1( set( v1 ) );
1205 
1206  const size_t jbegin( ( IsUpper_v<MT5> )
1207  ?( ( IsStrictlyUpper_v<MT5> )
1208  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1209  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1210  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1211  const size_t jend( ( IsLower_v<MT5> )
1212  ?( ( IsStrictlyLower_v<MT5> )
1213  ?( LOW ? min(i+1UL,i1) : i1 )
1214  :( LOW ? min(i,i1)+1UL : i1+1UL ) )
1215  :( LOW ? i+1UL : B.columns() ) );
1216  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1217 
1218  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1219  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1220 
1221  size_t j( jbegin );
1222 
1223  for( ; j<jpos; j+=SIMDSIZE ) {
1224  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
1225  }
1226  for( ; remainder && j<jend; ++j ) {
1227  C(i,j) += v1 * B(i1,j);
1228  }
1229  }
1230  }
1231  }
1233  //**********************************************************************************************
1234 
1235  //**Addition assignment to sparse matrices******************************************************
1236  // No special implementation for the addition assignment to sparse matrices.
1237  //**********************************************************************************************
1238 
1239  //**Subtraction assignment to dense matrices****************************************************
1252  template< typename MT // Type of the target dense matrix
1253  , bool SO > // Storage order of the target dense matrix
1254  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1255  {
1257 
1258  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1259  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1260 
1261  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
1262  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1263 
1264  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1265  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1266  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1267  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1268  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1269  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1270 
1271  SMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1272  }
1274  //**********************************************************************************************
1275 
1276  //**Default subtraction assignment to dense matrices********************************************
1290  template< typename MT3 // Type of the left-hand side target matrix
1291  , typename MT4 // Type of the left-hand side matrix operand
1292  , typename MT5 > // Type of the right-hand side matrix operand
1293  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1294  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
1295  {
1296  const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
1297 
1298  for( size_t jj=0UL; jj<B.columns(); jj+=block )
1299  {
1300  const size_t jtmp( min( jj+block, B.columns() ) );
1301 
1302  for( size_t i=0UL; i<A.rows(); ++i )
1303  {
1304  const auto end( A.end(i) );
1305  auto element( A.begin(i) );
1306 
1307  for( ; element!=end; ++element )
1308  {
1309  const size_t i1( element->index() );
1310 
1311  if( IsDiagonal_v<MT5> )
1312  {
1313  C(i,i1) -= element->value() * B(i1,i1);
1314  }
1315  else
1316  {
1317  const size_t jbegin( ( IsUpper_v<MT5> )
1318  ?( ( UPP )
1319  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1320  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1321  :( jj ) );
1322  const size_t jend( ( IsLower_v<MT5> )
1323  ?( ( LOW )
1324  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1325  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1326  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1327 
1328  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1329  continue;
1330 
1331  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1332 
1333  const size_t jnum( jend - jbegin );
1334  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1335  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1336 
1337  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1338  C(i,j ) -= element->value() * B(i1,j );
1339  C(i,j+1UL) -= element->value() * B(i1,j+1UL);
1340  C(i,j+2UL) -= element->value() * B(i1,j+2UL);
1341  C(i,j+3UL) -= element->value() * B(i1,j+3UL);
1342  }
1343  for( size_t j=jpos; j<jend; ++j ) {
1344  C(i,j) -= element->value() * B(i1,j);
1345  }
1346  }
1347  }
1348  }
1349  }
1350  }
1352  //**********************************************************************************************
1353 
1354  //**Optimized subtraction assignment to dense matrices******************************************
1368  template< typename MT3 // Type of the left-hand side target matrix
1369  , typename MT4 // Type of the left-hand side matrix operand
1370  , typename MT5 > // Type of the right-hand side matrix operand
1371  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1372  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1373  {
1374  const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
1375 
1376  for( size_t jj=0UL; jj<B.columns(); jj+=block )
1377  {
1378  const size_t jtmp( min( jj+block, B.columns() ) );
1379 
1380  for( size_t i=0UL; i<A.rows(); ++i )
1381  {
1382  const auto end( A.end(i) );
1383  auto element( A.begin(i) );
1384 
1385  const size_t nonzeros( A.nonZeros(i) );
1386  const size_t kpos( nonzeros & size_t(-4) );
1387  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1388 
1389  for( size_t k=0UL; k<kpos; k+=4UL )
1390  {
1391  const size_t i1( element->index() );
1392  const ET1 v1( element->value() );
1393  ++element;
1394  const size_t i2( element->index() );
1395  const ET1 v2( element->value() );
1396  ++element;
1397  const size_t i3( element->index() );
1398  const ET1 v3( element->value() );
1399  ++element;
1400  const size_t i4( element->index() );
1401  const ET1 v4( element->value() );
1402  ++element;
1403 
1404  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1405 
1406  const size_t jbegin( ( IsUpper_v<MT5> )
1407  ?( ( UPP )
1408  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1409  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1410  :( UPP ? max(i,jj) : jj ) );
1411  const size_t jend( ( IsLower_v<MT5> )
1412  ?( ( LOW )
1413  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
1414  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
1415  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1416 
1417  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1418  continue;
1419 
1420  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1421 
1422  const size_t jnum( jend - jbegin );
1423  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1424  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1425 
1426  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1427  C(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1428  C(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1429  C(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1430  C(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1431  }
1432  for( size_t j=jpos; j<jend; ++j ) {
1433  C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1434  }
1435  }
1436 
1437  for( ; element!=end; ++element )
1438  {
1439  const size_t i1( element->index() );
1440  const ET1 v1( element->value() );
1441 
1442  const size_t jbegin( ( IsUpper_v<MT5> )
1443  ?( ( UPP )
1444  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1445  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1446  :( UPP ? max(i,jj) : jj ) );
1447  const size_t jend( ( IsLower_v<MT5> )
1448  ?( ( LOW )
1449  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1450  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1451  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1452 
1453  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1454  continue;
1455 
1456  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1457 
1458  const size_t jnum( jend - jbegin );
1459  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1460  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1461 
1462  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1463  C(i,j ) -= v1 * B(i1,j );
1464  C(i,j+1UL) -= v1 * B(i1,j+1UL);
1465  C(i,j+2UL) -= v1 * B(i1,j+2UL);
1466  C(i,j+3UL) -= v1 * B(i1,j+3UL);
1467  }
1468  for( size_t j=jpos; j<jend; ++j ) {
1469  C(i,j) -= v1 * B(i1,j);
1470  }
1471  }
1472  }
1473  }
1474  }
1476  //**********************************************************************************************
1477 
1478  //**Vectorized subtraction assignment to dense matrices*****************************************
1492  template< typename MT3 // Type of the left-hand side target matrix
1493  , typename MT4 // Type of the left-hand side matrix operand
1494  , typename MT5 > // Type of the right-hand side matrix operand
1495  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1496  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1497  {
1498  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
1499 
1500  for( size_t i=0UL; i<A.rows(); ++i )
1501  {
1502  const auto end( A.end(i) );
1503  auto element( A.begin(i) );
1504 
1505  const size_t nonzeros( A.nonZeros(i) );
1506  const size_t kpos( nonzeros & size_t(-4) );
1507  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1508 
1509  for( size_t k=0UL; k<kpos; k+=4UL )
1510  {
1511  const size_t i1( element->index() );
1512  const ET1 v1( element->value() );
1513  ++element;
1514  const size_t i2( element->index() );
1515  const ET1 v2( element->value() );
1516  ++element;
1517  const size_t i3( element->index() );
1518  const ET1 v3( element->value() );
1519  ++element;
1520  const size_t i4( element->index() );
1521  const ET1 v4( element->value() );
1522  ++element;
1523 
1524  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1525 
1526  const SIMDType xmm1( set( v1 ) );
1527  const SIMDType xmm2( set( v2 ) );
1528  const SIMDType xmm3( set( v3 ) );
1529  const SIMDType xmm4( set( v4 ) );
1530 
1531  const size_t jbegin( ( IsUpper_v<MT5> )
1532  ?( ( IsStrictlyUpper_v<MT5> )
1533  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1534  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1535  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1536  const size_t jend( ( IsLower_v<MT5> )
1537  ?( ( IsStrictlyLower_v<MT5> )
1538  ?( LOW ? min(i+1UL,i4) : i4 )
1539  :( LOW ? min(i,i4)+1UL : i4+1UL ) )
1540  :( LOW ? i+1UL : B.columns() ) );
1541  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1542 
1543  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1544  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1545 
1546  size_t j( jbegin );
1547 
1548  for( ; j<jpos; j+=SIMDSIZE ) {
1549  C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) - xmm2 * B.load(i2,j) - xmm3 * B.load(i3,j) - xmm4 * B.load(i4,j) );
1550  }
1551  for( ; remainder && j<jend; ++j ) {
1552  C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1553  }
1554  }
1555 
1556  for( ; element!=end; ++element )
1557  {
1558  const size_t i1( element->index() );
1559  const ET1 v1( element->value() );
1560 
1561  const SIMDType xmm1( set( v1 ) );
1562 
1563  const size_t jbegin( ( IsUpper_v<MT5> )
1564  ?( ( IsStrictlyUpper_v<MT5> )
1565  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1566  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1567  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1568  const size_t jend( ( IsLower_v<MT5> )
1569  ?( ( IsStrictlyLower_v<MT5> )
1570  ?( LOW ? min(i+1UL,i1) : i1 )
1571  :( LOW ? min(i,i1)+1UL : i1+1UL ) )
1572  :( LOW ? i+1UL : B.columns() ) );
1573  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1574 
1575  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1576  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1577 
1578  size_t j( jbegin );
1579 
1580  for( ; j<jpos; j+=SIMDSIZE ) {
1581  C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) );
1582  }
1583  for( ; remainder && j<jend; ++j ) {
1584  C(i,j) -= v1 * B(i1,j);
1585  }
1586  }
1587  }
1588  }
1590  //**********************************************************************************************
1591 
1592  //**Subtraction assignment to sparse matrices***************************************************
1593  // No special implementation for the subtraction assignment to sparse matrices.
1594  //**********************************************************************************************
1595 
1596  //**Schur product assignment to dense matrices**************************************************
1609  template< typename MT // Type of the target dense matrix
1610  , bool SO > // Storage order of the target dense matrix
1611  friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1612  {
1614 
1618 
1619  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1620  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1621 
1622  const ResultType tmp( serial( rhs ) );
1623  schurAssign( ~lhs, tmp );
1624  }
1626  //**********************************************************************************************
1627 
1628  //**Schur product assignment to sparse matrices*************************************************
1629  // No special implementation for the Schur product assignment to sparse matrices.
1630  //**********************************************************************************************
1631 
1632  //**Multiplication assignment to dense matrices*************************************************
1633  // No special implementation for the multiplication assignment to dense matrices.
1634  //**********************************************************************************************
1635 
1636  //**Multiplication assignment to sparse matrices************************************************
1637  // No special implementation for the multiplication assignment to sparse matrices.
1638  //**********************************************************************************************
1639 
1640  //**SMP assignment to dense matrices************************************************************
1655  template< typename MT // Type of the target dense matrix
1656  , bool SO > // Storage order of the target dense matrix
1657  friend inline auto smpAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1658  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1659  {
1661 
1662  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1663  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1664 
1665  LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1666  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1667 
1668  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1669  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1670  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1671  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1672  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1673  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1674 
1675  smpAssign( ~lhs, A * B );
1676  }
1678  //**********************************************************************************************
1679 
1680  //**SMP assignment to sparse matrices***********************************************************
1695  template< typename MT // Type of the target sparse matrix
1696  , bool SO > // Storage order of the target sparse matrix
1697  friend inline auto smpAssign( SparseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1698  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1699  {
1701 
1702  using TmpType = If_t< SO, OppositeType, ResultType >;
1703 
1710 
1711  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1712  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1713 
1714  const ForwardFunctor fwd;
1715 
1716  const TmpType tmp( rhs );
1717  smpAssign( ~lhs, fwd( tmp ) );
1718  }
1720  //**********************************************************************************************
1721 
1722  //**SMP addition assignment to dense matrices***************************************************
1738  template< typename MT // Type of the target dense matrix
1739  , bool SO > // Storage order of the target dense matrix
1740  friend inline auto smpAddAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1741  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1742  {
1744 
1745  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1746  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1747 
1748  LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1749  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1750 
1751  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1752  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1753  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1754  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1755  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1756  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1757 
1758  smpAddAssign( ~lhs, A * B );
1759  }
1761  //**********************************************************************************************
1762 
1763  //**SMP addition assignment to sparse matrices**************************************************
1764  // No special implementation for the SMP addition assignment to sparse matrices.
1765  //**********************************************************************************************
1766 
1767  //**SMP subtraction assignment to dense matrices************************************************
1783  template< typename MT // Type of the target dense matrix
1784  , bool SO > // Storage order of the target dense matrix
1785  friend inline auto smpSubAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1786  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1787  {
1789 
1790  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1791  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1792 
1793  LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1794  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1795 
1796  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1797  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1798  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1799  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1800  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1801  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1802 
1803  smpSubAssign( ~lhs, A * B );
1804  }
1806  //**********************************************************************************************
1807 
1808  //**SMP subtraction assignment to sparse matrices***********************************************
1809  // No special implementation for the SMP subtraction assignment to sparse matrices.
1810  //**********************************************************************************************
1811 
1812  //**SMP Schur product assignment to dense matrices**********************************************
1825  template< typename MT // Type of the target dense matrix
1826  , bool SO > // Storage order of the target dense matrix
1827  friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1828  {
1830 
1834 
1835  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1836  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1837 
1838  const ResultType tmp( rhs );
1839  smpSchurAssign( ~lhs, tmp );
1840  }
1842  //**********************************************************************************************
1843 
1844  //**SMP Schur product assignment to sparse matrices*********************************************
1845  // No special implementation for the SMP Schur product assignment to sparse matrices.
1846  //**********************************************************************************************
1847 
1848  //**SMP multiplication assignment to dense matrices*********************************************
1849  // No special implementation for the SMP multiplication assignment to dense matrices.
1850  //**********************************************************************************************
1851 
1852  //**SMP multiplication assignment to sparse matrices********************************************
1853  // No special implementation for the SMP multiplication assignment to sparse matrices.
1854  //**********************************************************************************************
1855 
1856  //**Compile time checks*************************************************************************
1865  //**********************************************************************************************
1866 };
1867 //*************************************************************************************************
1868 
1869 
1870 
1871 
1872 //=================================================================================================
1873 //
1874 // GLOBAL BINARY ARITHMETIC OPERATORS
1875 //
1876 //=================================================================================================
1877 
1878 //*************************************************************************************************
1891 template< typename MT1 // Type of the left-hand side dense matrix
1892  , typename MT2 // Type of the right-hand side sparse matrix
1893  , DisableIf_t< ( IsIdentity_v<MT1> &&
1894  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > ) ||
1895  IsZero_v<MT1> >* = nullptr >
1896 inline const SMatDMatMultExpr<MT1,MT2,false,false,false,false>
1897  smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1898 {
1900 
1901  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1902 
1903  return SMatDMatMultExpr<MT1,MT2,false,false,false,false>( ~lhs, ~rhs );
1904 }
1906 //*************************************************************************************************
1907 
1908 
1909 //*************************************************************************************************
1923 template< typename MT1 // Type of the left-hand side sparse matrix
1924  , typename MT2 // Type of the right-hand side dense matrix
1925  , EnableIf_t< IsIdentity_v<MT1> &&
1926  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > >* = nullptr >
1927 inline const MT2&
1928  smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1929 {
1931 
1932  MAYBE_UNUSED( lhs );
1933 
1934  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1935 
1936  return (~rhs);
1937 }
1939 //*************************************************************************************************
1940 
1941 
1942 //*************************************************************************************************
1955 template< typename MT1 // Type of the left-hand side dense matrix
1956  , typename MT2 // Type of the right-hand side sparse matrix
1957  , EnableIf_t< IsZero_v<MT1> >* = nullptr >
1958 inline decltype(auto)
1959  smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1960 {
1962 
1963  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1964 
1965  using ReturnType = const MultTrait_t< ResultType_t<MT1>, ResultType_t<MT2> >;
1966 
1968  BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE( ReturnType );
1969 
1970  return ReturnType( (~lhs).rows(), (~rhs).columns() );
1971 }
1973 //*************************************************************************************************
1974 
1975 
1976 //*************************************************************************************************
2005 template< typename MT1 // Type of the left-hand side sparse matrix
2006  , typename MT2 > // Type of the right-hand side dense matrix
2007 inline decltype(auto)
2008  operator*( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
2009 {
2011 
2012  if( (~lhs).columns() != (~rhs).rows() ) {
2013  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
2014  }
2015 
2016  return smatdmatmult( ~lhs, ~rhs );
2017 }
2018 //*************************************************************************************************
2019 
2020 
2021 
2022 
2023 //=================================================================================================
2024 //
2025 // GLOBAL FUNCTIONS
2026 //
2027 //=================================================================================================
2028 
2029 //*************************************************************************************************
2053 template< typename MT1 // Type of the left-hand side sparse matrix
2054  , typename MT2 // Type of the right-hand side dense matrix
2055  , bool SF // Symmetry flag
2056  , bool HF // Hermitian flag
2057  , bool LF // Lower flag
2058  , bool UF > // Upper flag
2059 inline decltype(auto) declsym( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2060 {
2062 
2063  if( !isSquare( dm ) ) {
2064  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
2065  }
2066 
2067  using ReturnType = const SMatDMatMultExpr<MT1,MT2,true,HF,LF,UF>;
2068  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2069 }
2071 //*************************************************************************************************
2072 
2073 
2074 //*************************************************************************************************
2098 template< typename MT1 // Type of the left-hand side sparse matrix
2099  , typename MT2 // Type of the right-hand side dense matrix
2100  , bool SF // Symmetry flag
2101  , bool HF // Hermitian flag
2102  , bool LF // Lower flag
2103  , bool UF > // Upper flag
2104 inline decltype(auto) declherm( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2105 {
2107 
2108  if( !isSquare( dm ) ) {
2109  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
2110  }
2111 
2112  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,true,LF,UF>;
2113  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2114 }
2116 //*************************************************************************************************
2117 
2118 
2119 //*************************************************************************************************
2143 template< typename MT1 // Type of the left-hand side dense matrix
2144  , typename MT2 // Type of the right-hand side dense matrix
2145  , bool SF // Symmetry flag
2146  , bool HF // Hermitian flag
2147  , bool LF // Lower flag
2148  , bool UF > // Upper flag
2149 inline decltype(auto) decllow( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2150 {
2152 
2153  if( !isSquare( dm ) ) {
2154  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
2155  }
2156 
2157  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,true,UF>;
2158  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2159 }
2161 //*************************************************************************************************
2162 
2163 
2164 //*************************************************************************************************
2188 template< typename MT1 // Type of the left-hand side dense matrix
2189  , typename MT2 // Type of the right-hand side dense matrix
2190  , bool SF // Symmetry flag
2191  , bool HF // Hermitian flag
2192  , bool LF // Lower flag
2193  , bool UF > // Upper flag
2194 inline decltype(auto) declupp( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2195 {
2197 
2198  if( !isSquare( dm ) ) {
2199  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
2200  }
2201 
2202  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,true>;
2203  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2204 }
2206 //*************************************************************************************************
2207 
2208 
2209 //*************************************************************************************************
2233 template< typename MT1 // Type of the left-hand side dense matrix
2234  , typename MT2 // Type of the right-hand side dense matrix
2235  , bool SF // Symmetry flag
2236  , bool HF // Hermitian flag
2237  , bool LF // Lower flag
2238  , bool UF > // Upper flag
2239 inline decltype(auto) decldiag( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2240 {
2242 
2243  if( !isSquare( dm ) ) {
2244  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
2245  }
2246 
2247  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,true,true>;
2248  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2249 }
2251 //*************************************************************************************************
2252 
2253 
2254 
2255 
2256 //=================================================================================================
2257 //
2258 // SIZE SPECIALIZATIONS
2259 //
2260 //=================================================================================================
2261 
2262 //*************************************************************************************************
2264 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2265 struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2266  : public Size<MT1,0UL>
2267 {};
2268 
2269 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2270 struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2271  : public Size<MT2,1UL>
2272 {};
2274 //*************************************************************************************************
2275 
2276 
2277 
2278 
2279 //=================================================================================================
2280 //
2281 // ISALIGNED SPECIALIZATIONS
2282 //
2283 //=================================================================================================
2284 
2285 //*************************************************************************************************
2287 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2288 struct IsAligned< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2289  : public IsAligned<MT2>
2290 {};
2292 //*************************************************************************************************
2293 
2294 } // namespace blaze
2295 
2296 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: SMatDMatMultExpr.h:456
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Header file for the decldiag trait.
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:975
Header file for basic type definitions.
typename If< Condition, T1, T2 >::Type If_t
Auxiliary alias template for the If class template.The If_t alias template provides a convenient shor...
Definition: If.h:109
Header file for the declherm trait.
static constexpr bool HERM
Flag for Hermitian matrices.
Definition: SMatDMatMultExpr.h:155
typename T::ResultType ResultType_t
Alias declaration for nested ResultType type definitions.The ResultType_t alias declaration provides ...
Definition: Aliases.h:390
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Base template for the DeclUppTrait class.
Definition: DeclUppTrait.h:134
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type,...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:372
SMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the SMatDMatMultExpr class.
Definition: SMatDMatMultExpr.h:302
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:595
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Constraint on the data type.
SIMDTrait_t< ElementType > SIMDType
Resulting SIMD element type.
Definition: SMatDMatMultExpr.h:261
typename SIMDTrait< T >::Type SIMDTrait_t
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_t alias declaration provid...
Definition: SIMDTrait.h:315
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:154
RightOperand rightOperand() const noexcept
Returns the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:412
Header file for the MAYBE_UNUSED function template.
Header file for the IsIdentity type trait.
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1001
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for sparse matrix-dense matrix multiplications.The SMatDMatMultExpr class represent...
Definition: Forward.h:121
Header file for the reset shim.
static constexpr size_t SIMDSIZE
The number of elements packed within a single SIMD element.
Definition: SMatDMatMultExpr.h:293
If_t< IsExpression_v< MT2 >, const MT2, const MT2 & > RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:269
Constraints on the storage order of matrix types.
TransposeType_t< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: SMatDMatMultExpr.h:259
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
OppositeType_t< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: SMatDMatMultExpr.h:258
static constexpr bool UPP
Flag for upper matrices.
Definition: SMatDMatMultExpr.h:157
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes....
Definition: DenseMatrix.h:81
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes....
Definition: Forward.h:145
typename T::ElementType ElementType_t
Alias declaration for nested ElementType type definitions.The ElementType_t alias declaration provide...
Definition: Aliases.h:170
ElementType_t< ResultType > ElementType
Resulting element type.
Definition: SMatDMatMultExpr.h:260
ElementType_t< RT1 > ET1
Element type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:137
Constraint on the data type.
Constraint on the data type.
typename EnableIf< Condition, T >::Type EnableIf_t
Auxiliary type for the EnableIf class template.The EnableIf_t alias declaration provides a convenient...
Definition: EnableIf.h:138
LeftOperand leftOperand() const noexcept
Returns the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:402
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: SMatDMatMultExpr.h:424
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
typename If_t< HERM, DeclHermTrait< MultTrait_t< RT1, RT2 > >, If_t< SYM, DeclSymTrait< MultTrait_t< RT1, RT2 > >, If_t< LOW, If_t< UPP, DeclDiagTrait< MultTrait_t< RT1, RT2 > >, DeclLowTrait< MultTrait_t< RT1, RT2 > > >, If_t< UPP, DeclUppTrait< MultTrait_t< RT1, RT2 > >, MultTrait< RT1, RT2 > > > > >::Type ResultType
Result type for expression template evaluations.
Definition: SMatDMatMultExpr.h:256
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the DeclLow functor.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is not a zero vector or matrix type,...
Definition: Zero.h:61
Generic wrapper for the decllow() function.
Definition: DeclLow.h:59
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
CompositeType_t< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:140
Header file for the decllow trait.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: SMatDMatMultExpr.h:366
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for all SIMD functionality.
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1001
Header file for the IsLower type trait.
constexpr void MAYBE_UNUSED(const Args &...)
Suppression of unused parameter warnings.
Definition: MaybeUnused.h:81
ResultType_t< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:136
Header file for the IsAligned type trait.
Generic wrapper for the null function.
Definition: Noop.h:60
Header file for the IsTriangular type trait.
Base template for the DeclSymTrait class.
Definition: DeclSymTrait.h:134
Constraints on the storage order of matrix types.
Header file for the exception macros of the math module.
decltype(auto) max(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise maximum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1198
MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:438
Header file for the DeclDiag functor.
Constraint on the data type.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: SMatDMatMultExpr.h:317
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the IsPadded type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:103
typename T::OppositeType OppositeType_t
Alias declaration for nested OppositeType type definitions.The OppositeType_t alias declaration provi...
Definition: Aliases.h:270
static constexpr bool evaluateRight
Compilation switch for the composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:150
Header file for the conjugate shim.
Header file for the declupp trait.
ResultType_t< MT1 > RT1
Result type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:135
Header file for the IsSIMDCombinable type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: SMatDMatMultExpr.h:392
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
typename T::TransposeType TransposeType_t
Alias declaration for nested TransposeType type definitions.The TransposeType_t alias declaration pro...
Definition: Aliases.h:470
Header file for run time assertion macros.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: SMatDMatMultExpr.h:382
Base template for the DeclHermTrait class.
Definition: DeclHermTrait.h:134
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.The CompositeType_t alias declaration pro...
Definition: Aliases.h:90
Base template for the MultTrait class.
Definition: MultTrait.h:146
LeftOperand lhs_
Left-hand side sparse matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:463
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
const ResultType CompositeType
Data type for composite expression templates.
Definition: SMatDMatMultExpr.h:263
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
Header file for the IsZero type trait.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:295
Header file for the declsym trait.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for all forward declarations for expression class templates.
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1002
BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v< T > &&HasSize_v< T, 1UL >, If_t< IsSigned_v< T >, SIMDint8, SIMDuint8 > > set(T value) noexcept
Sets all values in the vector to the given 1-byte integral value.
Definition: Set.h:75
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: SMatDMatMultExpr.h:436
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:59
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:808
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
static constexpr bool SYM
Flag for symmetric matrices.
Definition: SMatDMatMultExpr.h:154
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:59
If_t< IsExpression_v< MT1 >, const MT1, const MT1 & > LeftOperand
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:266
If_t< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:275
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: SMatDMatMultExpr.h:446
static constexpr bool smpAssignable
Compilation switch for the expression template assignment strategy.
Definition: SMatDMatMultExpr.h:287
Base template for the DeclLowTrait class.
Definition: DeclLowTrait.h:134
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1002
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
static constexpr bool evaluateLeft
Compilation switch for the composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:145
static constexpr bool LOW
Flag for lower matrices.
Definition: SMatDMatMultExpr.h:156
CompositeType_t< MT1 > CT1
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:139
Header file for the IntegralConstant class template.
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:59
If_t< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:272
Header file for the DeclHerm functor.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: SMatDMatMultExpr.h:262
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:635
Header file for the IsUpper type trait.
typename DisableIf< Condition, T >::Type DisableIf_t
Auxiliary type for the DisableIf class template.The DisableIf_t alias declaration provides a convenie...
Definition: DisableIf.h:138
ElementType_t< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:138
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1324
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:59
Base template for the DeclDiagTrait class.
Definition: DeclDiagTrait.h:134
bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:951
Header file for the IsResizable type trait.
Header file for the Size type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is a zero vector or matrix type,...
Definition: Zero.h:81
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type,...
Definition: SparseMatrix.h:61
static constexpr bool simdEnabled
Compilation switch for the expression template evaluation strategy.
Definition: SMatDMatMultExpr.h:280
Header file for the IsExpression type trait class.
Header file for the function trace functionality.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:464