SMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
52 #include <blaze/math/Exception.h>
64 #include <blaze/math/shims/Reset.h>
66 #include <blaze/math/SIMD.h>
92 #include <blaze/math/views/Check.h>
97 #include <blaze/util/Assert.h>
98 #include <blaze/util/DisableIf.h>
99 #include <blaze/util/EnableIf.h>
102 #include <blaze/util/mpl/If.h>
103 #include <blaze/util/Types.h>
105 
106 
107 namespace blaze {
108 
109 //=================================================================================================
110 //
111 // CLASS SMATDMATMULTEXPR
112 //
113 //=================================================================================================
114 
115 //*************************************************************************************************
122 template< typename MT1 // Type of the left-hand side sparse matrix
123  , typename MT2 // Type of the right-hand side dense matrix
124  , bool SF // Symmetry flag
125  , bool HF // Hermitian flag
126  , bool LF // Lower flag
127  , bool UF > // Upper flag
128 class SMatDMatMultExpr
129  : public MatMatMultExpr< DenseMatrix< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false > >
130  , private Computation
131 {
132  private:
133  //**Type definitions****************************************************************************
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144  static constexpr bool evaluateLeft = ( IsComputation_v<MT1> || RequiresEvaluation_v<MT1> );
145  //**********************************************************************************************
146 
147  //**********************************************************************************************
149  static constexpr bool evaluateRight = ( IsComputation_v<MT2> || RequiresEvaluation_v<MT2> );
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
153  static constexpr bool SYM = ( SF && !( HF || LF || UF ) );
154  static constexpr bool HERM = ( HF && !( LF || UF ) );
155  static constexpr bool LOW = ( LF || ( ( SF || HF ) && UF ) );
156  static constexpr bool UPP = ( UF || ( ( SF || HF ) && LF ) );
157  //**********************************************************************************************
158 
159  //**********************************************************************************************
161 
165  template< typename T1, typename T2, typename T3 >
166  static constexpr bool IsEvaluationRequired_v = ( evaluateLeft || evaluateRight );
168  //**********************************************************************************************
169 
170  //**********************************************************************************************
172 
175  template< typename T1, typename T2, typename T3 >
176  static constexpr bool UseVectorizedKernel_v =
177  ( useOptimizedKernels &&
178  !IsDiagonal_v<T3> &&
179  T1::simdEnabled && T3::simdEnabled &&
180  IsRowMajorMatrix_v<T1> &&
181  IsSIMDCombinable_v< ElementType_t<T1>
183  , ElementType_t<T3> > &&
184  HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
185  HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
187  //**********************************************************************************************
188 
189  //**********************************************************************************************
191 
195  template< typename T1, typename T2, typename T3 >
196  static constexpr bool UseOptimizedKernel_v =
197  ( useOptimizedKernels &&
198  !UseVectorizedKernel_v<T1,T2,T3> &&
199  !IsDiagonal_v<T3> &&
200  !IsResizable_v< ElementType_t<T1> > &&
201  !IsResizable_v<ET1> );
203  //**********************************************************************************************
204 
205  //**********************************************************************************************
207 
210  template< typename T1, typename T2, typename T3 >
211  static constexpr bool UseDefaultKernel_v =
212  ( !UseVectorizedKernel_v<T1,T2,T3> &&
213  !UseOptimizedKernel_v<T1,T2,T3> );
215  //**********************************************************************************************
216 
217  //**********************************************************************************************
219 
222  using ForwardFunctor = If_t< HERM
223  , DeclHerm
224  , If_t< SYM
225  , DeclSym
226  , If_t< LOW
227  , If_t< UPP
228  , DeclDiag
229  , DeclLow >
230  , If_t< UPP
231  , DeclUpp
232  , Noop > > > >;
234  //**********************************************************************************************
235 
236  public:
237  //**Type definitions****************************************************************************
240 
243 
245  using ResultType = typename If_t< HERM
247  , If_t< SYM
249  , If_t< LOW
250  , If_t< UPP
253  , If_t< UPP
255  , MultTrait<RT1,RT2> > > > >::Type;
256 
261  using ReturnType = const ElementType;
262  using CompositeType = const ResultType;
263 
265  using LeftOperand = If_t< IsExpression_v<MT1>, const MT1, const MT1& >;
266 
268  using RightOperand = If_t< IsExpression_v<MT2>, const MT2, const MT2& >;
269 
272 
275  //**********************************************************************************************
276 
277  //**Compilation flags***************************************************************************
279  static constexpr bool simdEnabled =
280  ( !IsDiagonal_v<MT2> &&
281  MT2::simdEnabled &&
282  HasSIMDAdd_v<ET1,ET2> &&
283  HasSIMDMult_v<ET1,ET2> );
284 
286  static constexpr bool smpAssignable =
288  //**********************************************************************************************
289 
290  //**SIMD properties*****************************************************************************
292  static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
293  //**********************************************************************************************
294 
295  //**Constructor*********************************************************************************
301  explicit inline SMatDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
302  : lhs_( lhs ) // Left-hand side sparse matrix of the multiplication expression
303  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
304  {
305  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
306  }
307  //**********************************************************************************************
308 
309  //**Access operator*****************************************************************************
316  inline ReturnType operator()( size_t i, size_t j ) const {
317  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
318  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
319 
320  if( IsDiagonal_v<MT1> ) {
321  return lhs_(i,i) * rhs_(i,j);
322  }
323  else if( IsDiagonal_v<MT2> ) {
324  return lhs_(i,j) * rhs_(j,j);
325  }
326  else if( IsTriangular_v<MT1> || IsTriangular_v<MT2> ) {
327  const size_t begin( ( IsUpper_v<MT1> )
328  ?( ( IsLower_v<MT2> )
329  ?( max( ( IsStrictlyUpper_v<MT1> ? i+1UL : i )
330  , ( IsStrictlyLower_v<MT2> ? j+1UL : j ) ) )
331  :( IsStrictlyUpper_v<MT1> ? i+1UL : i ) )
332  :( ( IsLower_v<MT2> )
333  ?( IsStrictlyLower_v<MT2> ? j+1UL : j )
334  :( 0UL ) ) );
335  const size_t end( ( IsLower_v<MT1> )
336  ?( ( IsUpper_v<MT2> )
337  ?( min( ( IsStrictlyLower_v<MT1> ? i : i+1UL )
338  , ( IsStrictlyUpper_v<MT2> ? j : j+1UL ) ) )
339  :( IsStrictlyLower_v<MT1> ? i : i+1UL ) )
340  :( ( IsUpper_v<MT2> )
341  ?( IsStrictlyUpper_v<MT2> ? j : j+1UL )
342  :( lhs_.columns() ) ) );
343 
344  if( begin >= end ) return ElementType();
345 
346  const size_t n( end - begin );
347 
348  return subvector( row( lhs_, i, unchecked ), begin, n, unchecked ) *
349  subvector( column( rhs_, j, unchecked ), begin, n, unchecked );
350  }
351  else {
352  return row( lhs_, i, unchecked ) * column( rhs_, j, unchecked );
353  }
354  }
355  //**********************************************************************************************
356 
357  //**At function*********************************************************************************
365  inline ReturnType at( size_t i, size_t j ) const {
366  if( i >= lhs_.rows() ) {
367  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
368  }
369  if( j >= rhs_.columns() ) {
370  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
371  }
372  return (*this)(i,j);
373  }
374  //**********************************************************************************************
375 
376  //**Rows function*******************************************************************************
381  inline size_t rows() const noexcept {
382  return lhs_.rows();
383  }
384  //**********************************************************************************************
385 
386  //**Columns function****************************************************************************
391  inline size_t columns() const noexcept {
392  return rhs_.columns();
393  }
394  //**********************************************************************************************
395 
396  //**Left operand access*************************************************************************
401  inline LeftOperand leftOperand() const noexcept {
402  return lhs_;
403  }
404  //**********************************************************************************************
405 
406  //**Right operand access************************************************************************
411  inline RightOperand rightOperand() const noexcept {
412  return rhs_;
413  }
414  //**********************************************************************************************
415 
416  //**********************************************************************************************
422  template< typename T >
423  inline bool canAlias( const T* alias ) const noexcept {
424  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
425  }
426  //**********************************************************************************************
427 
428  //**********************************************************************************************
434  template< typename T >
435  inline bool isAliased( const T* alias ) const noexcept {
436  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
437  }
438  //**********************************************************************************************
439 
440  //**********************************************************************************************
445  inline bool isAligned() const noexcept {
446  return rhs_.isAligned();
447  }
448  //**********************************************************************************************
449 
450  //**********************************************************************************************
455  inline bool canSMPAssign() const noexcept {
456  return ( rows() * columns() >= SMP_SMATDMATMULT_THRESHOLD ) && !IsDiagonal_v<MT2>;
457  }
458  //**********************************************************************************************
459 
460  private:
461  //**Member variables****************************************************************************
464  //**********************************************************************************************
465 
466  //**Assignment to dense matrices****************************************************************
479  template< typename MT // Type of the target dense matrix
480  , bool SO > // Storage order of the target dense matrix
481  friend inline void assign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
482  {
484 
485  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
486  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
487 
488  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
489  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
490 
491  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
492  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
493  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
494  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
495  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
496  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
497 
498  SMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
499  }
501  //**********************************************************************************************
502 
503  //**Default assignment to dense matrices********************************************************
517  template< typename MT3 // Type of the left-hand side target matrix
518  , typename MT4 // Type of the left-hand side matrix operand
519  , typename MT5 > // Type of the right-hand side matrix operand
520  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
522  {
523  const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
524 
525  reset( C );
526 
527  for( size_t jj=0UL; jj<B.columns(); jj+=block )
528  {
529  const size_t jtmp( min( jj+block, B.columns() ) );
530 
531  for( size_t i=0UL; i<A.rows(); ++i )
532  {
533  auto element( A.begin(i) );
534  const auto end( A.end(i) );
535 
536  for( ; element!=end; ++element )
537  {
538  const size_t i1( element->index() );
539 
540  if( IsDiagonal_v<MT5> )
541  {
542  C(i,i1) = element->value() * B(i1,i1);
543  }
544  else
545  {
546  const size_t jbegin( ( IsUpper_v<MT5> )
547  ?( ( UPP )
548  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
549  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
550  :( jj ) );
551  const size_t jend( ( IsLower_v<MT5> )
552  ?( ( SYM || HERM || LOW )
553  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
554  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
555  :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
556 
557  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
558  continue;
559 
560  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
561 
562  for( size_t j=jbegin; j<jend; ++j ) {
563  if( isDefault( C(i,j) ) )
564  C(i,j) = element->value() * B(i1,j);
565  else
566  C(i,j) += element->value() * B(i1,j);
567  }
568  }
569  }
570  }
571  }
572 
573  if( SYM || HERM ) {
574  for( size_t i=0UL; i<A.rows(); ++i ) {
575  for( size_t j=i+1UL; j<B.columns(); ++j ) {
576  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
577  }
578  }
579  }
580  }
582  //**********************************************************************************************
583 
584  //**Optimized assignment to dense matrices******************************************************
598  template< typename MT3 // Type of the left-hand side target matrix
599  , typename MT4 // Type of the left-hand side matrix operand
600  , typename MT5 > // Type of the right-hand side matrix operand
601  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
602  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
603  {
604  const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
605 
606  reset( C );
607 
608  for( size_t jj=0UL; jj<B.columns(); jj+=block )
609  {
610  const size_t jtmp( min( jj+block, B.columns() ) );
611 
612  for( size_t i=0UL; i<A.rows(); ++i )
613  {
614  const auto end( A.end(i) );
615  auto element( A.begin(i) );
616 
617  const size_t nonzeros( A.nonZeros(i) );
618  const size_t kpos( nonzeros & size_t(-4) );
619  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
620 
621  for( size_t k=0UL; k<kpos; k+=4UL )
622  {
623  const size_t i1( element->index() );
624  const ET1 v1( element->value() );
625  ++element;
626  const size_t i2( element->index() );
627  const ET1 v2( element->value() );
628  ++element;
629  const size_t i3( element->index() );
630  const ET1 v3( element->value() );
631  ++element;
632  const size_t i4( element->index() );
633  const ET1 v4( element->value() );
634  ++element;
635 
636  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
637 
638  const size_t jbegin( ( IsUpper_v<MT5> )
639  ?( ( UPP )
640  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
641  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
642  :( UPP ? max(i,jj) : jj ) );
643  const size_t jend( ( IsLower_v<MT5> )
644  ?( ( SYM || HERM || LOW )
645  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
646  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
647  :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
648 
649  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
650  continue;
651 
652  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
653 
654  const size_t jnum( jend - jbegin );
655  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
656  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
657 
658  for( size_t j=jbegin; j<jpos; j+=4UL ) {
659  C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
660  C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
661  C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
662  C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
663  }
664  for( size_t j=jpos; j<jend; ++j ) {
665  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
666  }
667  }
668 
669  for( ; element!=end; ++element )
670  {
671  const size_t i1( element->index() );
672  const ET1 v1( element->value() );
673 
674  const size_t jbegin( ( IsUpper_v<MT5> )
675  ?( ( UPP )
676  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
677  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
678  :( UPP ? max(i,jj) : jj ) );
679  const size_t jend( ( IsLower_v<MT5> )
680  ?( ( SYM || HERM || LOW )
681  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
682  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
683  :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
684 
685  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
686  continue;
687 
688  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
689 
690  const size_t jnum( jend - jbegin );
691  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
692  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
693 
694  for( size_t j=jbegin; j<jpos; j+=4UL ) {
695  C(i,j ) += v1 * B(i1,j );
696  C(i,j+1UL) += v1 * B(i1,j+1UL);
697  C(i,j+2UL) += v1 * B(i1,j+2UL);
698  C(i,j+3UL) += v1 * B(i1,j+3UL);
699  }
700  for( size_t j=jpos; j<jend; ++j ) {
701  C(i,j) += v1 * B(i1,j);
702  }
703  }
704  }
705  }
706 
707  if( SYM || HERM ) {
708  for( size_t i=0UL; i<A.rows(); ++i ) {
709  for( size_t j=i+1UL; j<B.columns(); ++j ) {
710  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
711  }
712  }
713  }
714  }
716  //**********************************************************************************************
717 
718  //**Vectorized assignment to dense matrices*****************************************************
732  template< typename MT3 // Type of the left-hand side target matrix
733  , typename MT4 // Type of the left-hand side matrix operand
734  , typename MT5 > // Type of the right-hand side matrix operand
735  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
736  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
737  {
738  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
739 
740  reset( C );
741 
742  for( size_t i=0UL; i<A.rows(); ++i )
743  {
744  const auto end( A.end(i) );
745  auto element( A.begin(i) );
746 
747  const size_t nonzeros( A.nonZeros(i) );
748  const size_t kpos( nonzeros & size_t(-4) );
749  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
750 
751  for( size_t k=0UL; k<kpos; k+=4UL )
752  {
753  const size_t i1( element->index() );
754  const ET1 v1( element->value() );
755  ++element;
756  const size_t i2( element->index() );
757  const ET1 v2( element->value() );
758  ++element;
759  const size_t i3( element->index() );
760  const ET1 v3( element->value() );
761  ++element;
762  const size_t i4( element->index() );
763  const ET1 v4( element->value() );
764  ++element;
765 
766  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
767 
768  const SIMDType xmm1( set( v1 ) );
769  const SIMDType xmm2( set( v2 ) );
770  const SIMDType xmm3( set( v3 ) );
771  const SIMDType xmm4( set( v4 ) );
772 
773  const size_t jbegin( ( IsUpper_v<MT5> )
774  ?( ( IsStrictlyUpper_v<MT5> )
775  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
776  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
777  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
778  const size_t jend( ( IsLower_v<MT5> )
779  ?( ( IsStrictlyLower_v<MT5> )
780  ?( SYM || HERM || LOW ? min(i+1UL,i4) : i4 )
781  :( SYM || HERM || LOW ? min(i,i4)+1UL : i4+1UL ) )
782  :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
783  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
784 
785  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
786  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
787 
788  size_t j( jbegin );
789 
790  for( ; j<jpos; j+=SIMDSIZE ) {
791  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
792  }
793  for( ; remainder && j<jend; ++j ) {
794  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
795  }
796  }
797 
798  for( ; element!=end; ++element )
799  {
800  const size_t i1( element->index() );
801  const ET1 v1( element->value() );
802 
803  const SIMDType xmm1( set( v1 ) );
804 
805  const size_t jbegin( ( IsUpper_v<MT5> )
806  ?( ( IsStrictlyUpper_v<MT5> )
807  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
808  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
809  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
810  const size_t jend( ( IsLower_v<MT5> )
811  ?( ( IsStrictlyLower_v<MT5> )
812  ?( SYM || HERM || LOW ? min(i+1UL,i1) : i1 )
813  :( SYM || HERM || LOW ? min(i,i1)+1UL : i1+1UL ) )
814  :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
815  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
816 
817  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
818  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
819 
820  size_t j( jbegin );
821 
822  for( ; j<jpos; j+=SIMDSIZE ) {
823  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
824  }
825  for( ; remainder && j<jend; ++j ) {
826  C(i,j) += v1 * B(i1,j);
827  }
828  }
829  }
830 
831  if( SYM || HERM ) {
832  for( size_t i=0UL; i<A.rows(); ++i ) {
833  for( size_t j=i+1UL; j<B.columns(); ++j ) {
834  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
835  }
836  }
837  }
838  }
840  //**********************************************************************************************
841 
842  //**Assignment to sparse matrices***************************************************************
855  template< typename MT // Type of the target sparse matrix
856  , bool SO > // Storage order of the target sparse matrix
857  friend inline void assign( SparseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
858  {
860 
861  using TmpType = If_t< SO, OppositeType, ResultType >;
862 
869 
870  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
871  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
872 
873  const ForwardFunctor fwd;
874 
875  const TmpType tmp( serial( rhs ) );
876  assign( ~lhs, fwd( tmp ) );
877  }
879  //**********************************************************************************************
880 
881  //**Addition assignment to dense matrices*******************************************************
894  template< typename MT // Type of the target dense matrix
895  , bool SO > // Storage order of the target dense matrix
896  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
897  {
899 
900  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
901  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
902 
903  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
904  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
905 
906  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
907  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
908  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
909  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
910  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
911  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
912 
913  SMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
914  }
916  //**********************************************************************************************
917 
918  //**Default addition assignment to dense matrices***********************************************
932  template< typename MT3 // Type of the left-hand side target matrix
933  , typename MT4 // Type of the left-hand side matrix operand
934  , typename MT5 > // Type of the right-hand side matrix operand
935  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
936  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
937  {
938  const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
939 
940  for( size_t jj=0UL; jj<B.columns(); jj+=block )
941  {
942  const size_t jtmp( min( jj+block, B.columns() ) );
943 
944  for( size_t i=0UL; i<A.rows(); ++i )
945  {
946  const auto end( A.end(i) );
947  auto element( A.begin(i) );
948 
949  for( ; element!=end; ++element )
950  {
951  const size_t i1( element->index() );
952 
953  if( IsDiagonal_v<MT5> )
954  {
955  C(i,i1) += element->value() * B(i1,i1);
956  }
957  else
958  {
959  const size_t jbegin( ( IsUpper_v<MT5> )
960  ?( ( UPP )
961  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
962  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
963  :( jj ) );
964  const size_t jend( ( IsLower_v<MT5> )
965  ?( ( LOW )
966  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
967  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
968  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
969 
970  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
971  continue;
972 
973  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
974 
975  const size_t jnum( jend - jbegin );
976  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
977  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
978 
979  for( size_t j=jbegin; j<jpos; j+=4UL ) {
980  C(i,j ) += element->value() * B(i1,j );
981  C(i,j+1UL) += element->value() * B(i1,j+1UL);
982  C(i,j+2UL) += element->value() * B(i1,j+2UL);
983  C(i,j+3UL) += element->value() * B(i1,j+3UL);
984  }
985  for( size_t j=jpos; j<jend; ++j ) {
986  C(i,j) += element->value() * B(i1,j);
987  }
988  }
989  }
990  }
991  }
992  }
994  //**********************************************************************************************
995 
996  //**Optimized addition assignment to dense matrices*********************************************
1010  template< typename MT3 // Type of the left-hand side target matrix
1011  , typename MT4 // Type of the left-hand side matrix operand
1012  , typename MT5 > // Type of the right-hand side matrix operand
1013  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1014  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1015  {
1016  const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
1017 
1018  for( size_t jj=0UL; jj<B.columns(); jj+=block )
1019  {
1020  const size_t jtmp( min( jj+block, B.columns() ) );
1021 
1022  for( size_t i=0UL; i<A.rows(); ++i )
1023  {
1024  const auto end( A.end(i) );
1025  auto element( A.begin(i) );
1026 
1027  const size_t nonzeros( A.nonZeros(i) );
1028  const size_t kpos( nonzeros & size_t(-4) );
1029  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1030 
1031  for( size_t k=0UL; k<kpos; k+=4UL )
1032  {
1033  const size_t i1( element->index() );
1034  const ET1 v1( element->value() );
1035  ++element;
1036  const size_t i2( element->index() );
1037  const ET1 v2( element->value() );
1038  ++element;
1039  const size_t i3( element->index() );
1040  const ET1 v3( element->value() );
1041  ++element;
1042  const size_t i4( element->index() );
1043  const ET1 v4( element->value() );
1044  ++element;
1045 
1046  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1047 
1048  const size_t jbegin( ( IsUpper_v<MT5> )
1049  ?( ( UPP )
1050  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1051  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1052  :( UPP ? max(i,jj) : jj ) );
1053  const size_t jend( ( IsLower_v<MT5> )
1054  ?( ( LOW )
1055  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
1056  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
1057  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1058 
1059  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1060  continue;
1061 
1062  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1063 
1064  const size_t jnum( jend - jbegin );
1065  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1066  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1067 
1068  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1069  C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1070  C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1071  C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1072  C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1073  }
1074  for( size_t j=jpos; j<jend; ++j ) {
1075  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1076  }
1077  }
1078 
1079  for( ; element!=end; ++element )
1080  {
1081  const size_t i1( element->index() );
1082  const ET1 v1( element->value() );
1083 
1084  const size_t jbegin( ( IsUpper_v<MT5> )
1085  ?( ( UPP )
1086  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1087  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1088  :( UPP ? max(i,jj) : jj ) );
1089  const size_t jend( ( IsLower_v<MT5> )
1090  ?( ( LOW )
1091  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1092  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1093  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1094 
1095  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1096  continue;
1097 
1098  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1099 
1100  const size_t jnum( jend - jbegin );
1101  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1102  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1103 
1104  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1105  C(i,j ) += v1 * B(i1,j );
1106  C(i,j+1UL) += v1 * B(i1,j+1UL);
1107  C(i,j+2UL) += v1 * B(i1,j+2UL);
1108  C(i,j+3UL) += v1 * B(i1,j+3UL);
1109  }
1110  for( size_t j=jpos; j<jend; ++j ) {
1111  C(i,j) += v1 * B(i1,j);
1112  }
1113  }
1114  }
1115  }
1116  }
1118  //**********************************************************************************************
1119 
1120  //**Vectorized addition assignment to dense matrices********************************************
1134  template< typename MT3 // Type of the left-hand side target matrix
1135  , typename MT4 // Type of the left-hand side matrix operand
1136  , typename MT5 > // Type of the right-hand side matrix operand
1137  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1138  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1139  {
1140  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
1141 
1142  for( size_t i=0UL; i<A.rows(); ++i )
1143  {
1144  const auto end( A.end(i) );
1145  auto element( A.begin(i) );
1146 
1147  const size_t nonzeros( A.nonZeros(i) );
1148  const size_t kpos( nonzeros & size_t(-4) );
1149  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1150 
1151  for( size_t k=0UL; k<kpos; k+=4UL )
1152  {
1153  const size_t i1( element->index() );
1154  const ET1 v1( element->value() );
1155  ++element;
1156  const size_t i2( element->index() );
1157  const ET1 v2( element->value() );
1158  ++element;
1159  const size_t i3( element->index() );
1160  const ET1 v3( element->value() );
1161  ++element;
1162  const size_t i4( element->index() );
1163  const ET1 v4( element->value() );
1164  ++element;
1165 
1166  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1167 
1168  const SIMDType xmm1( set( v1 ) );
1169  const SIMDType xmm2( set( v2 ) );
1170  const SIMDType xmm3( set( v3 ) );
1171  const SIMDType xmm4( set( v4 ) );
1172 
1173  const size_t jbegin( ( IsUpper_v<MT5> )
1174  ?( ( IsStrictlyUpper_v<MT5> )
1175  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1176  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1177  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1178  const size_t jend( ( IsLower_v<MT5> )
1179  ?( ( IsStrictlyLower_v<MT5> )
1180  ?( LOW ? min(i+1UL,i4) : i4 )
1181  :( LOW ? min(i,i4)+1UL : i4+1UL ) )
1182  :( LOW ? i+1UL : B.columns() ) );
1183  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1184 
1185  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1186  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1187 
1188  size_t j( jbegin );
1189 
1190  for( ; j<jpos; j+=SIMDSIZE ) {
1191  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
1192  }
1193  for( ; remainder && j<jend; ++j ) {
1194  C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1195  }
1196  }
1197 
1198  for( ; element!=end; ++element )
1199  {
1200  const size_t i1( element->index() );
1201  const ET1 v1( element->value() );
1202 
1203  const SIMDType xmm1( set( v1 ) );
1204 
1205  const size_t jbegin( ( IsUpper_v<MT5> )
1206  ?( ( IsStrictlyUpper_v<MT5> )
1207  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1208  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1209  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1210  const size_t jend( ( IsLower_v<MT5> )
1211  ?( ( IsStrictlyLower_v<MT5> )
1212  ?( LOW ? min(i+1UL,i1) : i1 )
1213  :( LOW ? min(i,i1)+1UL : i1+1UL ) )
1214  :( LOW ? i+1UL : B.columns() ) );
1215  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1216 
1217  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1218  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1219 
1220  size_t j( jbegin );
1221 
1222  for( ; j<jpos; j+=SIMDSIZE ) {
1223  C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
1224  }
1225  for( ; remainder && j<jend; ++j ) {
1226  C(i,j) += v1 * B(i1,j);
1227  }
1228  }
1229  }
1230  }
1232  //**********************************************************************************************
1233 
1234  //**Addition assignment to sparse matrices******************************************************
1235  // No special implementation for the addition assignment to sparse matrices.
1236  //**********************************************************************************************
1237 
1238  //**Subtraction assignment to dense matrices****************************************************
1251  template< typename MT // Type of the target dense matrix
1252  , bool SO > // Storage order of the target dense matrix
1253  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1254  {
1256 
1257  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1258  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1259 
1260  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
1261  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1262 
1263  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1264  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1265  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1266  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1267  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1268  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1269 
1270  SMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1271  }
1273  //**********************************************************************************************
1274 
1275  //**Default subtraction assignment to dense matrices********************************************
1289  template< typename MT3 // Type of the left-hand side target matrix
1290  , typename MT4 // Type of the left-hand side matrix operand
1291  , typename MT5 > // Type of the right-hand side matrix operand
1292  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1293  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
1294  {
1295  const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
1296 
1297  for( size_t jj=0UL; jj<B.columns(); jj+=block )
1298  {
1299  const size_t jtmp( min( jj+block, B.columns() ) );
1300 
1301  for( size_t i=0UL; i<A.rows(); ++i )
1302  {
1303  const auto end( A.end(i) );
1304  auto element( A.begin(i) );
1305 
1306  for( ; element!=end; ++element )
1307  {
1308  const size_t i1( element->index() );
1309 
1310  if( IsDiagonal_v<MT5> )
1311  {
1312  C(i,i1) -= element->value() * B(i1,i1);
1313  }
1314  else
1315  {
1316  const size_t jbegin( ( IsUpper_v<MT5> )
1317  ?( ( UPP )
1318  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1319  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1320  :( jj ) );
1321  const size_t jend( ( IsLower_v<MT5> )
1322  ?( ( LOW )
1323  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1324  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1325  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1326 
1327  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1328  continue;
1329 
1330  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1331 
1332  const size_t jnum( jend - jbegin );
1333  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1334  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1335 
1336  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1337  C(i,j ) -= element->value() * B(i1,j );
1338  C(i,j+1UL) -= element->value() * B(i1,j+1UL);
1339  C(i,j+2UL) -= element->value() * B(i1,j+2UL);
1340  C(i,j+3UL) -= element->value() * B(i1,j+3UL);
1341  }
1342  for( size_t j=jpos; j<jend; ++j ) {
1343  C(i,j) -= element->value() * B(i1,j);
1344  }
1345  }
1346  }
1347  }
1348  }
1349  }
1351  //**********************************************************************************************
1352 
1353  //**Optimized subtraction assignment to dense matrices******************************************
1367  template< typename MT3 // Type of the left-hand side target matrix
1368  , typename MT4 // Type of the left-hand side matrix operand
1369  , typename MT5 > // Type of the right-hand side matrix operand
1370  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1371  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1372  {
1373  const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
1374 
1375  for( size_t jj=0UL; jj<B.columns(); jj+=block )
1376  {
1377  const size_t jtmp( min( jj+block, B.columns() ) );
1378 
1379  for( size_t i=0UL; i<A.rows(); ++i )
1380  {
1381  const auto end( A.end(i) );
1382  auto element( A.begin(i) );
1383 
1384  const size_t nonzeros( A.nonZeros(i) );
1385  const size_t kpos( nonzeros & size_t(-4) );
1386  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1387 
1388  for( size_t k=0UL; k<kpos; k+=4UL )
1389  {
1390  const size_t i1( element->index() );
1391  const ET1 v1( element->value() );
1392  ++element;
1393  const size_t i2( element->index() );
1394  const ET1 v2( element->value() );
1395  ++element;
1396  const size_t i3( element->index() );
1397  const ET1 v3( element->value() );
1398  ++element;
1399  const size_t i4( element->index() );
1400  const ET1 v4( element->value() );
1401  ++element;
1402 
1403  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1404 
1405  const size_t jbegin( ( IsUpper_v<MT5> )
1406  ?( ( UPP )
1407  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1408  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1409  :( UPP ? max(i,jj) : jj ) );
1410  const size_t jend( ( IsLower_v<MT5> )
1411  ?( ( LOW )
1412  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
1413  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
1414  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1415 
1416  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1417  continue;
1418 
1419  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1420 
1421  const size_t jnum( jend - jbegin );
1422  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1423  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1424 
1425  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1426  C(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1427  C(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1428  C(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1429  C(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1430  }
1431  for( size_t j=jpos; j<jend; ++j ) {
1432  C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1433  }
1434  }
1435 
1436  for( ; element!=end; ++element )
1437  {
1438  const size_t i1( element->index() );
1439  const ET1 v1( element->value() );
1440 
1441  const size_t jbegin( ( IsUpper_v<MT5> )
1442  ?( ( UPP )
1443  ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1444  :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1445  :( UPP ? max(i,jj) : jj ) );
1446  const size_t jend( ( IsLower_v<MT5> )
1447  ?( ( LOW )
1448  ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1449  :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1450  :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1451 
1452  if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1453  continue;
1454 
1455  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1456 
1457  const size_t jnum( jend - jbegin );
1458  const size_t jpos( jbegin + ( jnum & size_t(-4) ) );
1459  BLAZE_INTERNAL_ASSERT( ( jbegin + jnum - ( jnum % 4UL ) ) == jpos, "Invalid end calculation" );
1460 
1461  for( size_t j=jbegin; j<jpos; j+=4UL ) {
1462  C(i,j ) -= v1 * B(i1,j );
1463  C(i,j+1UL) -= v1 * B(i1,j+1UL);
1464  C(i,j+2UL) -= v1 * B(i1,j+2UL);
1465  C(i,j+3UL) -= v1 * B(i1,j+3UL);
1466  }
1467  for( size_t j=jpos; j<jend; ++j ) {
1468  C(i,j) -= v1 * B(i1,j);
1469  }
1470  }
1471  }
1472  }
1473  }
1475  //**********************************************************************************************
1476 
1477  //**Vectorized subtraction assignment to dense matrices*****************************************
1491  template< typename MT3 // Type of the left-hand side target matrix
1492  , typename MT4 // Type of the left-hand side matrix operand
1493  , typename MT5 > // Type of the right-hand side matrix operand
1494  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1495  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1496  {
1497  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
1498 
1499  for( size_t i=0UL; i<A.rows(); ++i )
1500  {
1501  const auto end( A.end(i) );
1502  auto element( A.begin(i) );
1503 
1504  const size_t nonzeros( A.nonZeros(i) );
1505  const size_t kpos( nonzeros & size_t(-4) );
1506  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1507 
1508  for( size_t k=0UL; k<kpos; k+=4UL )
1509  {
1510  const size_t i1( element->index() );
1511  const ET1 v1( element->value() );
1512  ++element;
1513  const size_t i2( element->index() );
1514  const ET1 v2( element->value() );
1515  ++element;
1516  const size_t i3( element->index() );
1517  const ET1 v3( element->value() );
1518  ++element;
1519  const size_t i4( element->index() );
1520  const ET1 v4( element->value() );
1521  ++element;
1522 
1523  BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1524 
1525  const SIMDType xmm1( set( v1 ) );
1526  const SIMDType xmm2( set( v2 ) );
1527  const SIMDType xmm3( set( v3 ) );
1528  const SIMDType xmm4( set( v4 ) );
1529 
1530  const size_t jbegin( ( IsUpper_v<MT5> )
1531  ?( ( IsStrictlyUpper_v<MT5> )
1532  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1533  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1534  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1535  const size_t jend( ( IsLower_v<MT5> )
1536  ?( ( IsStrictlyLower_v<MT5> )
1537  ?( LOW ? min(i+1UL,i4) : i4 )
1538  :( LOW ? min(i,i4)+1UL : i4+1UL ) )
1539  :( LOW ? i+1UL : B.columns() ) );
1540  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1541 
1542  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1543  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1544 
1545  size_t j( jbegin );
1546 
1547  for( ; j<jpos; j+=SIMDSIZE ) {
1548  C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) - xmm2 * B.load(i2,j) - xmm3 * B.load(i3,j) - xmm4 * B.load(i4,j) );
1549  }
1550  for( ; remainder && j<jend; ++j ) {
1551  C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1552  }
1553  }
1554 
1555  for( ; element!=end; ++element )
1556  {
1557  const size_t i1( element->index() );
1558  const ET1 v1( element->value() );
1559 
1560  const SIMDType xmm1( set( v1 ) );
1561 
1562  const size_t jbegin( ( IsUpper_v<MT5> )
1563  ?( ( IsStrictlyUpper_v<MT5> )
1564  ?( ( UPP ? max(i,i1+1UL) : i1+1UL ) & size_t(-SIMDSIZE) )
1565  :( ( UPP ? max(i,i1) : i1 ) & size_t(-SIMDSIZE) ) )
1566  :( UPP ? ( i & size_t(-SIMDSIZE) ) : 0UL ) );
1567  const size_t jend( ( IsLower_v<MT5> )
1568  ?( ( IsStrictlyLower_v<MT5> )
1569  ?( LOW ? min(i+1UL,i1) : i1 )
1570  :( LOW ? min(i,i1)+1UL : i1+1UL ) )
1571  :( LOW ? i+1UL : B.columns() ) );
1572  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1573 
1574  const size_t jpos( remainder ? ( jend & size_t(-SIMDSIZE) ) : jend );
1575  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % (SIMDSIZE) ) ) == jpos, "Invalid end calculation" );
1576 
1577  size_t j( jbegin );
1578 
1579  for( ; j<jpos; j+=SIMDSIZE ) {
1580  C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) );
1581  }
1582  for( ; remainder && j<jend; ++j ) {
1583  C(i,j) -= v1 * B(i1,j);
1584  }
1585  }
1586  }
1587  }
1589  //**********************************************************************************************
1590 
1591  //**Subtraction assignment to sparse matrices***************************************************
1592  // No special implementation for the subtraction assignment to sparse matrices.
1593  //**********************************************************************************************
1594 
1595  //**Schur product assignment to dense matrices**************************************************
1608  template< typename MT // Type of the target dense matrix
1609  , bool SO > // Storage order of the target dense matrix
1610  friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1611  {
1613 
1617 
1618  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1619  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1620 
1621  const ResultType tmp( serial( rhs ) );
1622  schurAssign( ~lhs, tmp );
1623  }
1625  //**********************************************************************************************
1626 
1627  //**Schur product assignment to sparse matrices*************************************************
1628  // No special implementation for the Schur product assignment to sparse matrices.
1629  //**********************************************************************************************
1630 
1631  //**Multiplication assignment to dense matrices*************************************************
1632  // No special implementation for the multiplication assignment to dense matrices.
1633  //**********************************************************************************************
1634 
1635  //**Multiplication assignment to sparse matrices************************************************
1636  // No special implementation for the multiplication assignment to sparse matrices.
1637  //**********************************************************************************************
1638 
1639  //**SMP assignment to dense matrices************************************************************
1654  template< typename MT // Type of the target dense matrix
1655  , bool SO > // Storage order of the target dense matrix
1656  friend inline auto smpAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1657  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1658  {
1660 
1661  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1662  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1663 
1664  LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1665  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1666 
1667  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1668  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1669  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1670  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1671  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1672  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1673 
1674  smpAssign( ~lhs, A * B );
1675  }
1677  //**********************************************************************************************
1678 
1679  //**SMP assignment to sparse matrices***********************************************************
1694  template< typename MT // Type of the target sparse matrix
1695  , bool SO > // Storage order of the target sparse matrix
1696  friend inline auto smpAssign( SparseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1697  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1698  {
1700 
1701  using TmpType = If_t< SO, OppositeType, ResultType >;
1702 
1709 
1710  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1711  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1712 
1713  const ForwardFunctor fwd;
1714 
1715  const TmpType tmp( rhs );
1716  smpAssign( ~lhs, fwd( tmp ) );
1717  }
1719  //**********************************************************************************************
1720 
1721  //**SMP addition assignment to dense matrices***************************************************
1737  template< typename MT // Type of the target dense matrix
1738  , bool SO > // Storage order of the target dense matrix
1739  friend inline auto smpAddAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1740  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1741  {
1743 
1744  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1745  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1746 
1747  LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1748  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1749 
1750  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1751  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1752  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1753  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1754  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1755  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1756 
1757  smpAddAssign( ~lhs, A * B );
1758  }
1760  //**********************************************************************************************
1761 
1762  //**SMP addition assignment to sparse matrices**************************************************
1763  // No special implementation for the SMP addition assignment to sparse matrices.
1764  //**********************************************************************************************
1765 
1766  //**SMP subtraction assignment to dense matrices************************************************
1782  template< typename MT // Type of the target dense matrix
1783  , bool SO > // Storage order of the target dense matrix
1784  friend inline auto smpSubAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1785  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1786  {
1788 
1789  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1790  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1791 
1792  LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1793  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1794 
1795  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1796  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1797  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1798  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1799  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1800  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1801 
1802  smpSubAssign( ~lhs, A * B );
1803  }
1805  //**********************************************************************************************
1806 
1807  //**SMP subtraction assignment to sparse matrices***********************************************
1808  // No special implementation for the SMP subtraction assignment to sparse matrices.
1809  //**********************************************************************************************
1810 
1811  //**SMP Schur product assignment to dense matrices**********************************************
1824  template< typename MT // Type of the target dense matrix
1825  , bool SO > // Storage order of the target dense matrix
1826  friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1827  {
1829 
1833 
1834  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1835  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1836 
1837  const ResultType tmp( rhs );
1838  smpSchurAssign( ~lhs, tmp );
1839  }
1841  //**********************************************************************************************
1842 
1843  //**SMP Schur product assignment to sparse matrices*********************************************
1844  // No special implementation for the SMP Schur product assignment to sparse matrices.
1845  //**********************************************************************************************
1846 
1847  //**SMP multiplication assignment to dense matrices*********************************************
1848  // No special implementation for the SMP multiplication assignment to dense matrices.
1849  //**********************************************************************************************
1850 
1851  //**SMP multiplication assignment to sparse matrices********************************************
1852  // No special implementation for the SMP multiplication assignment to sparse matrices.
1853  //**********************************************************************************************
1854 
1855  //**Compile time checks*************************************************************************
1864  //**********************************************************************************************
1865 };
1866 //*************************************************************************************************
1867 
1868 
1869 
1870 
1871 //=================================================================================================
1872 //
1873 // GLOBAL BINARY ARITHMETIC OPERATORS
1874 //
1875 //=================================================================================================
1876 
1877 //*************************************************************************************************
1890 template< typename MT1 // Type of the left-hand side dense matrix
1891  , typename MT2 // Type of the right-hand side sparse matrix
1892  , DisableIf_t< ( IsIdentity_v<MT1> &&
1893  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > ) ||
1894  IsZero_v<MT1> >* = nullptr >
1895 inline const SMatDMatMultExpr<MT1,MT2,false,false,false,false>
1896  smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1897 {
1899 
1900  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1901 
1902  return SMatDMatMultExpr<MT1,MT2,false,false,false,false>( ~lhs, ~rhs );
1903 }
1905 //*************************************************************************************************
1906 
1907 
1908 //*************************************************************************************************
1922 template< typename MT1 // Type of the left-hand side sparse matrix
1923  , typename MT2 // Type of the right-hand side dense matrix
1924  , EnableIf_t< IsIdentity_v<MT1> &&
1925  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > >* = nullptr >
1926 inline const MT2&
1927  smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1928 {
1930 
1931  UNUSED_PARAMETER( lhs );
1932 
1933  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1934 
1935  return (~rhs);
1936 }
1938 //*************************************************************************************************
1939 
1940 
1941 //*************************************************************************************************
1954 template< typename MT1 // Type of the left-hand side dense matrix
1955  , typename MT2 // Type of the right-hand side sparse matrix
1956  , EnableIf_t< IsZero_v<MT1> >* = nullptr >
1957 inline decltype(auto)
1958  smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1959 {
1961 
1962  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1963 
1964  using ReturnType = const MultTrait_t< ResultType_t<MT1>, ResultType_t<MT2> >;
1965 
1968 
1969  return ReturnType( (~lhs).rows(), (~rhs).columns() );
1970 }
1972 //*************************************************************************************************
1973 
1974 
1975 //*************************************************************************************************
2004 template< typename MT1 // Type of the left-hand side sparse matrix
2005  , typename MT2 > // Type of the right-hand side dense matrix
2006 inline decltype(auto)
2007  operator*( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
2008 {
2010 
2011  if( (~lhs).columns() != (~rhs).rows() ) {
2012  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
2013  }
2014 
2015  return smatdmatmult( ~lhs, ~rhs );
2016 }
2017 //*************************************************************************************************
2018 
2019 
2020 
2021 
2022 //=================================================================================================
2023 //
2024 // GLOBAL FUNCTIONS
2025 //
2026 //=================================================================================================
2027 
2028 //*************************************************************************************************
2052 template< typename MT1 // Type of the left-hand side sparse matrix
2053  , typename MT2 // Type of the right-hand side dense matrix
2054  , bool SF // Symmetry flag
2055  , bool HF // Hermitian flag
2056  , bool LF // Lower flag
2057  , bool UF > // Upper flag
2058 inline decltype(auto) declsym( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2059 {
2061 
2062  if( !isSquare( dm ) ) {
2063  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
2064  }
2065 
2066  using ReturnType = const SMatDMatMultExpr<MT1,MT2,true,HF,LF,UF>;
2067  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2068 }
2070 //*************************************************************************************************
2071 
2072 
2073 //*************************************************************************************************
2097 template< typename MT1 // Type of the left-hand side sparse matrix
2098  , typename MT2 // Type of the right-hand side dense matrix
2099  , bool SF // Symmetry flag
2100  , bool HF // Hermitian flag
2101  , bool LF // Lower flag
2102  , bool UF > // Upper flag
2103 inline decltype(auto) declherm( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2104 {
2106 
2107  if( !isSquare( dm ) ) {
2108  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
2109  }
2110 
2111  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,true,LF,UF>;
2112  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2113 }
2115 //*************************************************************************************************
2116 
2117 
2118 //*************************************************************************************************
2142 template< typename MT1 // Type of the left-hand side dense matrix
2143  , typename MT2 // Type of the right-hand side dense matrix
2144  , bool SF // Symmetry flag
2145  , bool HF // Hermitian flag
2146  , bool LF // Lower flag
2147  , bool UF > // Upper flag
2148 inline decltype(auto) decllow( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2149 {
2151 
2152  if( !isSquare( dm ) ) {
2153  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
2154  }
2155 
2156  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,true,UF>;
2157  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2158 }
2160 //*************************************************************************************************
2161 
2162 
2163 //*************************************************************************************************
2187 template< typename MT1 // Type of the left-hand side dense matrix
2188  , typename MT2 // Type of the right-hand side dense matrix
2189  , bool SF // Symmetry flag
2190  , bool HF // Hermitian flag
2191  , bool LF // Lower flag
2192  , bool UF > // Upper flag
2193 inline decltype(auto) declupp( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2194 {
2196 
2197  if( !isSquare( dm ) ) {
2198  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
2199  }
2200 
2201  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,true>;
2202  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2203 }
2205 //*************************************************************************************************
2206 
2207 
2208 //*************************************************************************************************
2232 template< typename MT1 // Type of the left-hand side dense matrix
2233  , typename MT2 // Type of the right-hand side dense matrix
2234  , bool SF // Symmetry flag
2235  , bool HF // Hermitian flag
2236  , bool LF // Lower flag
2237  , bool UF > // Upper flag
2238 inline decltype(auto) decldiag( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2239 {
2241 
2242  if( !isSquare( dm ) ) {
2243  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
2244  }
2245 
2246  using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,true,true>;
2247  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2248 }
2250 //*************************************************************************************************
2251 
2252 
2253 
2254 
2255 //=================================================================================================
2256 //
2257 // SIZE SPECIALIZATIONS
2258 //
2259 //=================================================================================================
2260 
2261 //*************************************************************************************************
2263 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2264 struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2265  : public Size<MT1,0UL>
2266 {};
2267 
2268 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2269 struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2270  : public Size<MT2,1UL>
2271 {};
2273 //*************************************************************************************************
2274 
2275 
2276 
2277 
2278 //=================================================================================================
2279 //
2280 // ISALIGNED SPECIALIZATIONS
2281 //
2282 //=================================================================================================
2283 
2284 //*************************************************************************************************
2286 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2287 struct IsAligned< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2288  : public IsAligned<MT2>
2289 {};
2291 //*************************************************************************************************
2292 
2293 } // namespace blaze
2294 
2295 #endif
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:329
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: SMatDMatMultExpr.h:455
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Header file for the decldiag trait.
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:975
Header file for basic type definitions.
typename If< Condition, T1, T2 >::Type If_t
Auxiliary alias declaration for the If class template.The If_t alias declaration provides a convenien...
Definition: If.h:109
Header file for the declherm trait.
static constexpr bool HERM
Flag for Hermitian matrices.
Definition: SMatDMatMultExpr.h:154
typename T::ResultType ResultType_t
Alias declaration for nested ResultType type definitions.The ResultType_t alias declaration provides ...
Definition: Aliases.h:390
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Base template for the DeclUppTrait class.
Definition: DeclUppTrait.h:134
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:372
SMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the SMatDMatMultExpr class.
Definition: SMatDMatMultExpr.h:301
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:591
static constexpr bool smpAssignable
Compilation flag for SMP assignments.
Definition: CompressedMatrix.h:3113
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Constraint on the data type.
SIMDTrait_t< ElementType > SIMDType
Resulting SIMD element type.
Definition: SMatDMatMultExpr.h:260
typename SIMDTrait< T >::Type SIMDTrait_t
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_t alias declaration provid...
Definition: SIMDTrait.h:315
RightOperand rightOperand() const noexcept
Returns the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:411
Header file for the IsIdentity type trait.
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1002
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for sparse matrix-dense matrix multiplications.The SMatDMatMultExpr class represent...
Definition: Forward.h:115
Header file for the reset shim.
static constexpr size_t SIMDSIZE
The number of elements packed within a single SIMD element.
Definition: SMatDMatMultExpr.h:292
If_t< IsExpression_v< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:268
Constraints on the storage order of matrix types.
TransposeType_t< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: SMatDMatMultExpr.h:258
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
OppositeType_t< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: SMatDMatMultExpr.h:257
constexpr void UNUSED_PARAMETER(const Args &...)
Suppression of unused parameter warnings.
Definition: Unused.h:81
static constexpr bool UPP
Flag for upper matrices.
Definition: SMatDMatMultExpr.h:156
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:80
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:137
typename T::ElementType ElementType_t
Alias declaration for nested ElementType type definitions.The ElementType_t alias declaration provide...
Definition: Aliases.h:170
ElementType_t< ResultType > ElementType
Resulting element type.
Definition: SMatDMatMultExpr.h:259
ElementType_t< RT1 > ET1
Element type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:136
Constraint on the data type.
Constraint on the data type.
typename EnableIf< Condition, T >::Type EnableIf_t
Auxiliary type for the EnableIf class template.The EnableIf_t alias declaration provides a convenient...
Definition: EnableIf.h:138
LeftOperand leftOperand() const noexcept
Returns the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:401
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: SMatDMatMultExpr.h:423
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
typename If_t< HERM, DeclHermTrait< MultTrait_t< RT1, RT2 > >, If_t< SYM, DeclSymTrait< MultTrait_t< RT1, RT2 > >, If_t< LOW, If_t< UPP, DeclDiagTrait< MultTrait_t< RT1, RT2 > >, DeclLowTrait< MultTrait_t< RT1, RT2 > > >, If_t< UPP, DeclUppTrait< MultTrait_t< RT1, RT2 > >, MultTrait< RT1, RT2 > > > > >::Type ResultType
Result type for expression template evaluations.
Definition: SMatDMatMultExpr.h:255
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the DeclLow functor.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is not a zero vector or matrix type...
Definition: Zero.h:61
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
CompositeType_t< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:139
Header file for the decllow trait.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: SMatDMatMultExpr.h:365
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for all SIMD functionality.
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1002
Header file for the IsLower type trait.
ResultType_t< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:135
Header file for the IsAligned type trait.
Generic wrapper for the null function.
Definition: Noop.h:59
Header file for the IsTriangular type trait.
Base template for the DeclSymTrait class.
Definition: DeclSymTrait.h:134
Constraints on the storage order of matrix types.
Header file for the exception macros of the math module.
decltype(auto) max(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise maximum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1179
MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:438
Header file for the DeclDiag functor.
Constraint on the data type.
Header file for all forward declarations for expression class templates.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: SMatDMatMultExpr.h:316
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the IsPadded type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:103
typename T::OppositeType OppositeType_t
Alias declaration for nested OppositeType type definitions.The OppositeType_t alias declaration provi...
Definition: Aliases.h:270
static constexpr bool evaluateRight
Compilation switch for the composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:149
Header file for the conjugate shim.
Header file for the declupp trait.
ResultType_t< MT1 > RT1
Result type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:134
Header file for the IsSIMDCombinable type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: SMatDMatMultExpr.h:391
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
typename T::TransposeType TransposeType_t
Alias declaration for nested TransposeType type definitions.The TransposeType_t alias declaration pro...
Definition: Aliases.h:470
Header file for run time assertion macros.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: SMatDMatMultExpr.h:381
Base template for the DeclHermTrait class.
Definition: DeclHermTrait.h:134
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.The CompositeType_t alias declaration pro...
Definition: Aliases.h:90
Base template for the MultTrait class.
Definition: MultTrait.h:146
LeftOperand lhs_
Left-hand side sparse matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:462
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
const ResultType CompositeType
Data type for composite expression templates.
Definition: SMatDMatMultExpr.h:262
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
Header file for the IsZero type trait.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:295
Header file for the declsym trait.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1002
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: SMatDMatMultExpr.h:435
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:808
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
static constexpr bool SYM
Flag for symmetric matrices.
Definition: SMatDMatMultExpr.h:153
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
If_t< IsExpression_v< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:265
If_t< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:274
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3081
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: SMatDMatMultExpr.h:445
static constexpr bool smpAssignable
Compilation switch for the expression template assignment strategy.
Definition: SMatDMatMultExpr.h:286
Base template for the DeclLowTrait class.
Definition: DeclLowTrait.h:134
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1002
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
static constexpr bool evaluateLeft
Compilation switch for the composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:144
static constexpr bool LOW
Flag for lower matrices.
Definition: SMatDMatMultExpr.h:155
CompositeType_t< MT1 > CT1
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:138
Header file for the IntegralConstant class template.
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
If_t< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:271
Header file for the DeclHerm functor.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: SMatDMatMultExpr.h:261
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:631
Header file for the IsUpper type trait.
ElementType_t< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:137
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1326
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
Base template for the DeclDiagTrait class.
Definition: DeclDiagTrait.h:134
bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:951
Header file for the IsResizable type trait.
Header file for the Size type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is a zero vector or matrix type...
Definition: Zero.h:81
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:61
static constexpr bool simdEnabled
Compilation switch for the expression template evaluation strategy.
Definition: SMatDMatMultExpr.h:279
Header file for the IsExpression type trait class.
Header file for the function trace functionality.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:463