TDMatTSMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
52 #include <blaze/math/Exception.h>
65 #include <blaze/math/shims/Reset.h>
67 #include <blaze/math/SIMD.h>
93 #include <blaze/math/views/Check.h>
98 #include <blaze/util/Assert.h>
99 #include <blaze/util/DisableIf.h>
100 #include <blaze/util/EnableIf.h>
103 #include <blaze/util/mpl/If.h>
104 #include <blaze/util/Types.h>
106 
107 
108 namespace blaze {
109 
110 //=================================================================================================
111 //
112 // CLASS TDMATTSMATMULTEXPR
113 //
114 //=================================================================================================
115 
116 //*************************************************************************************************
123 template< typename MT1 // Type of the left-hand side dense matrix
124  , typename MT2 // Type of the right-hand side sparse matrix
125  , bool SF // Symmetry flag
126  , bool HF // Hermitian flag
127  , bool LF // Lower flag
128  , bool UF > // Upper flag
129 class TDMatTSMatMultExpr
130  : public MatMatMultExpr< DenseMatrix< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, true > >
131  , private Computation
132 {
133  private:
134  //**Type definitions****************************************************************************
141  //**********************************************************************************************
142 
143  //**********************************************************************************************
145  static constexpr bool evaluateLeft = ( IsComputation_v<MT1> || RequiresEvaluation_v<MT1> );
146  //**********************************************************************************************
147 
148  //**********************************************************************************************
150  static constexpr bool evaluateRight = ( IsComputation_v<MT2> || RequiresEvaluation_v<MT2> );
151  //**********************************************************************************************
152 
153  //**********************************************************************************************
154  static constexpr bool SYM = ( SF && !( HF || LF || UF ) );
155  static constexpr bool HERM = ( HF && !( LF || UF ) );
156  static constexpr bool LOW = ( LF || ( ( SF || HF ) && UF ) );
157  static constexpr bool UPP = ( UF || ( ( SF || HF ) && LF ) );
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
166  template< typename T1, typename T2, typename T3 >
167  static constexpr bool IsEvaluationRequired_v = ( evaluateLeft || evaluateRight );
169  //**********************************************************************************************
170 
171  //**********************************************************************************************
173 
176  template< typename T1, typename T2, typename T3 >
177  static constexpr bool UseVectorizedKernel_v =
178  ( useOptimizedKernels &&
179  !IsDiagonal_v<T2> &&
180  T1::simdEnabled && T2::simdEnabled &&
181  IsColumnMajorMatrix_v<T1> &&
182  IsSIMDCombinable_v< ElementType_t<T1>
184  , ElementType_t<T3> > &&
185  HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
186  HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
188  //**********************************************************************************************
189 
190  //**********************************************************************************************
192 
196  template< typename T1, typename T2, typename T3 >
197  static constexpr bool UseOptimizedKernel_v =
198  ( useOptimizedKernels &&
199  !UseVectorizedKernel_v<T1,T2,T3> &&
200  !IsDiagonal_v<T2> &&
201  !IsResizable_v< ElementType_t<T1> > &&
202  !IsResizable_v<ET2> );
204  //**********************************************************************************************
205 
206  //**********************************************************************************************
208 
211  template< typename T1, typename T2, typename T3 >
212  static constexpr bool UseDefaultKernel_v =
213  ( !UseVectorizedKernel_v<T1,T2,T3> && !UseOptimizedKernel_v<T1,T2,T3> );
215  //**********************************************************************************************
216 
217  //**********************************************************************************************
219 
222  using ForwardFunctor = If_t< HERM
223  , DeclHerm
224  , If_t< SYM
225  , DeclSym
226  , If_t< LOW
227  , If_t< UPP
228  , DeclDiag
229  , DeclLow >
230  , If_t< UPP
231  , DeclUpp
232  , Noop > > > >;
234  //**********************************************************************************************
235 
236  public:
237  //**Type definitions****************************************************************************
240 
243 
245  using ResultType = typename If_t< HERM
247  , If_t< SYM
249  , If_t< LOW
250  , If_t< UPP
253  , If_t< UPP
255  , MultTrait<RT1,RT2> > > > >::Type;
256 
261  using ReturnType = const ElementType;
262  using CompositeType = const ResultType;
263 
265  using LeftOperand = If_t< IsExpression_v<MT1>, const MT1, const MT1& >;
266 
268  using RightOperand = If_t< IsExpression_v<MT2>, const MT2, const MT2& >;
269 
272 
275  //**********************************************************************************************
276 
277  //**Compilation flags***************************************************************************
279  static constexpr bool simdEnabled =
280  ( !IsDiagonal_v<MT1> &&
281  MT1::simdEnabled &&
282  HasSIMDAdd_v<ET1,ET2> &&
283  HasSIMDMult_v<ET1,ET2> );
284 
286  static constexpr bool smpAssignable =
288  //**********************************************************************************************
289 
290  //**SIMD properties*****************************************************************************
292  static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
293  //**********************************************************************************************
294 
295  //**Constructor*********************************************************************************
301  explicit inline TDMatTSMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
302  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
303  , rhs_( rhs ) // Right-hand side sparse matrix of the multiplication expression
304  {
305  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
306  }
307  //**********************************************************************************************
308 
309  //**Access operator*****************************************************************************
316  inline ReturnType operator()( size_t i, size_t j ) const {
317  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
318  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
319 
320  if( IsDiagonal_v<MT1> ) {
321  return lhs_(i,i) * rhs_(i,j);
322  }
323  else if( IsDiagonal_v<MT2> ) {
324  return lhs_(i,j) * rhs_(j,j);
325  }
326  else if( IsTriangular_v<MT1> || IsTriangular_v<MT2> ) {
327  const size_t begin( ( IsUpper_v<MT1> )
328  ?( ( IsLower_v<MT2> )
329  ?( max( ( IsStrictlyUpper_v<MT1> ? i+1UL : i )
330  , ( IsStrictlyLower_v<MT2> ? j+1UL : j ) ) )
331  :( IsStrictlyUpper_v<MT1> ? i+1UL : i ) )
332  :( ( IsLower_v<MT2> )
333  ?( IsStrictlyLower_v<MT2> ? j+1UL : j )
334  :( 0UL ) ) );
335  const size_t end( ( IsLower_v<MT1> )
336  ?( ( IsUpper_v<MT2> )
337  ?( min( ( IsStrictlyLower_v<MT1> ? i : i+1UL )
338  , ( IsStrictlyUpper_v<MT2> ? j : j+1UL ) ) )
339  :( IsStrictlyLower_v<MT1> ? i : i+1UL ) )
340  :( ( IsUpper_v<MT2> )
341  ?( IsStrictlyUpper_v<MT2> ? j : j+1UL )
342  :( lhs_.columns() ) ) );
343 
344  if( begin >= end ) return ElementType();
345 
346  const size_t n( end - begin );
347 
348  return subvector( row( lhs_, i, unchecked ), begin, n, unchecked ) *
349  subvector( column( rhs_, j, unchecked ), begin, n, unchecked );
350  }
351  else {
352  return row( lhs_, i, unchecked ) * column( rhs_, j, unchecked );
353  }
354  }
355  //**********************************************************************************************
356 
357  //**At function*********************************************************************************
365  inline ReturnType at( size_t i, size_t j ) const {
366  if( i >= lhs_.rows() ) {
367  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
368  }
369  if( j >= rhs_.columns() ) {
370  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
371  }
372  return (*this)(i,j);
373  }
374  //**********************************************************************************************
375 
376  //**Rows function*******************************************************************************
381  inline size_t rows() const noexcept {
382  return lhs_.rows();
383  }
384  //**********************************************************************************************
385 
386  //**Columns function****************************************************************************
391  inline size_t columns() const noexcept {
392  return rhs_.columns();
393  }
394  //**********************************************************************************************
395 
396  //**Left operand access*************************************************************************
401  inline LeftOperand leftOperand() const noexcept {
402  return lhs_;
403  }
404  //**********************************************************************************************
405 
406  //**Right operand access************************************************************************
411  inline RightOperand rightOperand() const noexcept {
412  return rhs_;
413  }
414  //**********************************************************************************************
415 
416  //**********************************************************************************************
422  template< typename T >
423  inline bool canAlias( const T* alias ) const noexcept {
424  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
425  }
426  //**********************************************************************************************
427 
428  //**********************************************************************************************
434  template< typename T >
435  inline bool isAliased( const T* alias ) const noexcept {
436  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
437  }
438  //**********************************************************************************************
439 
440  //**********************************************************************************************
445  inline bool isAligned() const noexcept {
446  return lhs_.isAligned();
447  }
448  //**********************************************************************************************
449 
450  //**********************************************************************************************
455  inline bool canSMPAssign() const noexcept {
456  return ( rows() * columns() >= SMP_TDMATTSMATMULT_THRESHOLD ) && !IsDiagonal_v<MT1>;
457  }
458  //**********************************************************************************************
459 
460  private:
461  //**Member variables****************************************************************************
464  //**********************************************************************************************
465 
466  //**Assignment to dense matrices****************************************************************
479  template< typename MT // Type of the target dense matrix
480  , bool SO > // Storage order of the target dense matrix
481  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
482  {
484 
485  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
486  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
487 
488  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
489  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side sparse matrix operand
490 
491  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
492  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
493  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
494  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
495  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
496  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
497 
498  TDMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
499  }
501  //**********************************************************************************************
502 
503  //**Default assignment to dense matrices********************************************************
517  template< typename MT3 // Type of the left-hand side target matrix
518  , typename MT4 // Type of the left-hand side matrix operand
519  , typename MT5 > // Type of the right-hand side matrix operand
520  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
522  {
523  const size_t block( IsColumnMajorMatrix_v<MT3> || IsDiagonal_v<MT4> ? A.rows() : 64UL );
524 
525  reset( C );
526 
527  for( size_t ii=0UL; ii<A.rows(); ii+=block )
528  {
529  const size_t itmp( min( ii+block, A.rows() ) );
530 
531  for( size_t j=0UL; j<B.columns(); ++j )
532  {
533  auto element( B.begin(j) );
534  const auto end( B.end(j) );
535 
536  for( ; element!=end; ++element )
537  {
538  const size_t j1( element->index() );
539 
540  if( IsDiagonal_v<MT4> )
541  {
542  C(j1,j) = A(j1,j1) * element->value();
543  }
544  else
545  {
546  const size_t ibegin( ( IsLower_v<MT4> )
547  ?( ( LOW )
548  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
549  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
550  :( LOW ? max(j,ii) : ii ) );
551  const size_t iend( ( IsUpper_v<MT4> )
552  ?( ( SYM || HERM || UPP )
553  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
554  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
555  :( SYM || HERM || UPP ? min(j+1UL,itmp) : itmp ) );
556 
557  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
558  continue;
559 
560  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
561 
562  for( size_t i=ibegin; i<iend; ++i ) {
563  if( isDefault( C(i,j) ) )
564  C(i,j) = A(i,j1) * element->value();
565  else
566  C(i,j) += A(i,j1) * element->value();
567  }
568  }
569  }
570  }
571  }
572 
573  if( SYM || HERM ) {
574  for( size_t j=0UL; j<B.columns(); ++j ) {
575  for( size_t i=j+1UL; i<A.rows(); ++i ) {
576  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
577  }
578  }
579  }
580  }
582  //**********************************************************************************************
583 
584  //**Optimized assignment to dense matrices******************************************************
598  template< typename MT3 // Type of the left-hand side target matrix
599  , typename MT4 // Type of the left-hand side matrix operand
600  , typename MT5 > // Type of the right-hand side matrix operand
601  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
602  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
603  {
604  const size_t block( IsColumnMajorMatrix_v<MT3> ? A.rows() : 64UL );
605 
606  reset( C );
607 
608  for( size_t ii=0UL; ii<A.rows(); ii+=block )
609  {
610  const size_t itmp( min( ii+block, A.rows() ) );
611 
612  for( size_t j=0UL; j<B.columns(); ++j )
613  {
614  const auto end( B.end(j) );
615  auto element( B.begin(j) );
616 
617  const size_t nonzeros( B.nonZeros(j) );
618  const size_t kpos( nonzeros & size_t(-4) );
619  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
620 
621  for( size_t k=0UL; k<kpos; k+=4UL )
622  {
623  const size_t j1( element->index() );
624  const ET2 v1( element->value() );
625  ++element;
626  const size_t j2( element->index() );
627  const ET2 v2( element->value() );
628  ++element;
629  const size_t j3( element->index() );
630  const ET2 v3( element->value() );
631  ++element;
632  const size_t j4( element->index() );
633  const ET2 v4( element->value() );
634  ++element;
635 
636  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
637 
638  const size_t ibegin( ( IsLower_v<MT4> )
639  ?( ( LOW )
640  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
641  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
642  :( LOW ? max(j,ii) : ii ) );
643  const size_t iend( ( IsUpper_v<MT4> )
644  ?( ( SYM || HERM || UPP )
645  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) )
646  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) ) )
647  :( SYM || HERM || UPP ? min(j+1UL,itmp) : itmp ) );
648 
649  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
650  continue;
651 
652  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
653 
654  const size_t inum( iend - ibegin );
655  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
656  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
657 
658  for( size_t i=ibegin; i<ipos; i+=4UL ) {
659  C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
660  C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
661  C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
662  C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
663  }
664  for( size_t i=ipos; i<iend; ++i ) {
665  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
666  }
667  }
668 
669  for( ; element!=end; ++element )
670  {
671  const size_t j1( element->index() );
672  const ET2 v1( element->value() );
673 
674  const size_t ibegin( ( IsLower_v<MT4> )
675  ?( ( LOW )
676  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
677  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
678  :( LOW ? max(j,ii) : ii ) );
679  const size_t iend( ( IsUpper_v<MT4> )
680  ?( ( SYM || HERM || UPP )
681  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
682  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
683  :( SYM || HERM || UPP ? min(j+1UL,itmp) : itmp ) );
684 
685  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
686  continue;
687 
688  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
689 
690  const size_t inum( iend - ibegin );
691  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
692  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
693 
694  for( size_t i=ibegin; i<ipos; i+=4UL ) {
695  C(i ,j) += A(i ,j1) * v1;
696  C(i+1UL,j) += A(i+1UL,j1) * v1;
697  C(i+2UL,j) += A(i+2UL,j1) * v1;
698  C(i+3UL,j) += A(i+3UL,j1) * v1;
699  }
700  for( size_t i=ipos; i<iend; ++i ) {
701  C(i,j) += A(i,j1) * v1;
702  }
703  }
704  }
705  }
706 
707  if( SYM || HERM ) {
708  for( size_t j=0UL; j<B.columns(); ++j ) {
709  for( size_t i=j+1UL; i<A.rows(); ++i ) {
710  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
711  }
712  }
713  }
714  }
716  //**********************************************************************************************
717 
718  //**Vectorized assignment to column-major dense matrices****************************************
732  template< typename MT3 // Type of the left-hand side target matrix
733  , typename MT4 // Type of the left-hand side matrix operand
734  , typename MT5 > // Type of the right-hand side matrix operand
735  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
736  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
737  {
738  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT4> );
739 
740  reset( C );
741 
742  for( size_t j=0UL; j<B.columns(); ++j )
743  {
744  const auto end( B.end(j) );
745  auto element( B.begin(j) );
746 
747  const size_t nonzeros( B.nonZeros(j) );
748  const size_t kpos( nonzeros & size_t(-4) );
749  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
750 
751  for( size_t k=0UL; k<kpos; k+=4UL )
752  {
753  const size_t j1( element->index() );
754  const ET2 v1( element->value() );
755  ++element;
756  const size_t j2( element->index() );
757  const ET2 v2( element->value() );
758  ++element;
759  const size_t j3( element->index() );
760  const ET2 v3( element->value() );
761  ++element;
762  const size_t j4( element->index() );
763  const ET2 v4( element->value() );
764  ++element;
765 
766  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
767 
768  const SIMDType xmm1( set( v1 ) );
769  const SIMDType xmm2( set( v2 ) );
770  const SIMDType xmm3( set( v3 ) );
771  const SIMDType xmm4( set( v4 ) );
772 
773  const size_t ibegin( ( IsLower_v<MT4> )
774  ?( ( IsStrictlyLower_v<MT4> )
775  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
776  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
777  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
778  const size_t iend( ( IsUpper_v<MT4> )
779  ?( ( IsStrictlyUpper_v<MT4> )
780  ?( SYM || HERM || UPP ? max(j+1UL,j4) : j4 )
781  :( SYM || HERM || UPP ? max(j,j4)+1UL : j4+1UL ) )
782  :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
783  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
784 
785  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
786  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
787 
788  size_t i( ibegin );
789 
790  for( ; i<ipos; i+=SIMDSIZE ) {
791  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
792  }
793  for( ; remainder && i<iend; ++i ) {
794  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
795  }
796  }
797 
798  for( ; element!=end; ++element )
799  {
800  const size_t j1( element->index() );
801  const ET2 v1( element->value() );
802 
803  const SIMDType xmm1( set( v1 ) );
804 
805  const size_t ibegin( ( IsLower_v<MT4> )
806  ?( ( IsStrictlyLower_v<MT4> )
807  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
808  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
809  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
810  const size_t iend( ( IsUpper_v<MT4> )
811  ?( ( IsStrictlyUpper_v<MT4> )
812  ?( SYM || HERM || UPP ? max(j+1UL,j1) : j1 )
813  :( SYM || HERM || UPP ? max(j,j1)+1UL : j1+1UL ) )
814  :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
815  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
816 
817  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
818  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
819 
820  size_t i( ibegin );
821 
822  for( ; i<ipos; i+=SIMDSIZE ) {
823  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
824  }
825  for( ; remainder && i<iend; ++i ) {
826  C(i,j) += A(i,j1) * v1;
827  }
828  }
829  }
830 
831  if( SYM || HERM ) {
832  for( size_t j=0UL; j<B.columns(); ++j ) {
833  for( size_t i=j+1UL; i<A.rows(); ++i ) {
834  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
835  }
836  }
837  }
838  }
840  //**********************************************************************************************
841 
842  //**Assignment to sparse matrices***************************************************************
855  template< typename MT // Type of the target sparse matrix
856  , bool SO > // Storage order of the target sparse matrix
857  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
858  {
860 
861  using TmpType = If_t< SO, ResultType, OppositeType >;
862 
869 
870  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
871  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
872 
873  const ForwardFunctor fwd;
874 
875  const TmpType tmp( serial( rhs ) );
876  assign( ~lhs, fwd( tmp ) );
877  }
879  //**********************************************************************************************
880 
881  //**Addition assignment to dense matrices*******************************************************
894  template< typename MT // Type of the target dense matrix
895  , bool SO > // Storage order of the target dense matrix
896  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
897  {
899 
900  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
901  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
902 
903  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
904  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side sparse matrix operand
905 
906  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
907  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
908  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
909  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
910  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
911  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
912 
913  TDMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
914  }
916  //**********************************************************************************************
917 
918  //**Default addition assignment to dense matrices***********************************************
932  template< typename MT3 // Type of the left-hand side target matrix
933  , typename MT4 // Type of the left-hand side matrix operand
934  , typename MT5 > // Type of the right-hand side matrix operand
935  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
936  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
937  {
938  const size_t block( IsColumnMajorMatrix_v<MT3> || IsDiagonal_v<MT4> ? A.rows() : 64UL );
939 
940  for( size_t ii=0UL; ii<A.rows(); ii+=block )
941  {
942  const size_t itmp( min( ii+block, A.rows() ) );
943 
944  for( size_t j=0UL; j<B.columns(); ++j )
945  {
946  auto element( B.begin(j) );
947  const auto end( B.end(j) );
948 
949  for( ; element!=end; ++element )
950  {
951  const size_t j1( element->index() );
952 
953  if( IsDiagonal_v<MT4> )
954  {
955  C(j1,j) += A(j1,j1) * element->value();
956  }
957  else
958  {
959  const size_t ibegin( ( IsLower_v<MT4> )
960  ?( ( LOW )
961  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
962  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
963  :( LOW ? max(j,ii) : ii ) );
964  const size_t iend( ( IsUpper_v<MT4> )
965  ?( ( UPP )
966  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
967  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
968  :( UPP ? min(j+1UL,itmp) : itmp ) );
969 
970  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
971  continue;
972 
973  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
974 
975  const size_t inum( iend - ibegin );
976  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
977  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
978 
979  for( size_t i=ibegin; i<ipos; i+=4UL ) {
980  C(i ,j) += A(i ,j1) * element->value();
981  C(i+1UL,j) += A(i+1UL,j1) * element->value();
982  C(i+2UL,j) += A(i+2UL,j1) * element->value();
983  C(i+3UL,j) += A(i+3UL,j1) * element->value();
984  }
985  for( size_t i=ipos; i<iend; ++i ) {
986  C(i,j) += A(i,j1) * element->value();
987  }
988  }
989  }
990  }
991  }
992  }
994  //**********************************************************************************************
995 
996  //**Optimized addition assignment to dense matrices*********************************************
1010  template< typename MT3 // Type of the left-hand side target matrix
1011  , typename MT4 // Type of the left-hand side matrix operand
1012  , typename MT5 > // Type of the right-hand side matrix operand
1013  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1014  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1015  {
1016  const size_t block( IsColumnMajorMatrix_v<MT3> ? A.rows() : 64UL );
1017 
1018  for( size_t ii=0UL; ii<A.rows(); ii+=block )
1019  {
1020  const size_t itmp( min( ii+block, A.rows() ) );
1021 
1022  for( size_t j=0UL; j<B.columns(); ++j )
1023  {
1024  const auto end( B.end(j) );
1025  auto element( B.begin(j) );
1026 
1027  const size_t nonzeros( B.nonZeros(j) );
1028  const size_t kpos( nonzeros & size_t(-4) );
1029  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1030 
1031  for( size_t k=0UL; k<kpos; k+=4UL )
1032  {
1033  const size_t j1( element->index() );
1034  const ET2 v1( element->value() );
1035  ++element;
1036  const size_t j2( element->index() );
1037  const ET2 v2( element->value() );
1038  ++element;
1039  const size_t j3( element->index() );
1040  const ET2 v3( element->value() );
1041  ++element;
1042  const size_t j4( element->index() );
1043  const ET2 v4( element->value() );
1044  ++element;
1045 
1046  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1047 
1048  const size_t ibegin( ( IsLower_v<MT4> )
1049  ?( ( LOW )
1050  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1051  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1052  :( LOW ? max(j,ii) : ii ) );
1053  const size_t iend( ( IsUpper_v<MT4> )
1054  ?( ( UPP )
1055  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) )
1056  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) ) )
1057  :( UPP ? min(j+1UL,itmp) : itmp ) );
1058 
1059  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1060  continue;
1061 
1062  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1063 
1064  const size_t inum( iend - ibegin );
1065  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1066  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1067 
1068  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1069  C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1070  C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1071  C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1072  C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1073  }
1074  for( size_t i=ipos; i<iend; ++i ) {
1075  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1076  }
1077  }
1078 
1079  for( ; element!=end; ++element )
1080  {
1081  const size_t j1( element->index() );
1082  const ET2 v1( element->value() );
1083 
1084  const size_t ibegin( ( IsLower_v<MT4> )
1085  ?( ( LOW )
1086  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1087  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1088  :( LOW ? max(j,ii) : ii ) );
1089  const size_t iend( ( IsUpper_v<MT4> )
1090  ?( ( UPP )
1091  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
1092  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
1093  :( UPP ? min(j+1UL,itmp) : itmp ) );
1094 
1095  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1096  continue;
1097 
1098  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1099 
1100  const size_t inum( iend - ibegin );
1101  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1102  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1103 
1104  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1105  C(i ,j) += A(i ,j1) * v1;
1106  C(i+1UL,j) += A(i+1UL,j1) * v1;
1107  C(i+2UL,j) += A(i+2UL,j1) * v1;
1108  C(i+3UL,j) += A(i+3UL,j1) * v1;
1109  }
1110  for( size_t i=ipos; i<iend; ++i ) {
1111  C(i,j) += A(i,j1) * v1;
1112  }
1113  }
1114  }
1115  }
1116  }
1118  //**********************************************************************************************
1119 
1120  //**Vectorized addition assignment to column-major dense matrices*******************************
1134  template< typename MT3 // Type of the left-hand side target matrix
1135  , typename MT4 // Type of the left-hand side matrix operand
1136  , typename MT5 > // Type of the right-hand side matrix operand
1137  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1138  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1139  {
1140  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT4> );
1141 
1142  for( size_t j=0UL; j<B.columns(); ++j )
1143  {
1144  const auto end( B.end(j) );
1145  auto element( B.begin(j) );
1146 
1147  const size_t nonzeros( B.nonZeros(j) );
1148  const size_t kpos( nonzeros & size_t(-4) );
1149  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1150 
1151  for( size_t k=0UL; k<kpos; k+=4UL )
1152  {
1153  const size_t j1( element->index() );
1154  const ET2 v1( element->value() );
1155  ++element;
1156  const size_t j2( element->index() );
1157  const ET2 v2( element->value() );
1158  ++element;
1159  const size_t j3( element->index() );
1160  const ET2 v3( element->value() );
1161  ++element;
1162  const size_t j4( element->index() );
1163  const ET2 v4( element->value() );
1164  ++element;
1165 
1166  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1167 
1168  const SIMDType xmm1( set( v1 ) );
1169  const SIMDType xmm2( set( v2 ) );
1170  const SIMDType xmm3( set( v3 ) );
1171  const SIMDType xmm4( set( v4 ) );
1172 
1173  const size_t ibegin( ( IsLower_v<MT4> )
1174  ?( ( IsStrictlyLower_v<MT4> )
1175  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1176  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1177  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1178  const size_t iend( ( IsUpper_v<MT4> )
1179  ?( ( IsStrictlyUpper_v<MT4> )
1180  ?( UPP ? max(j+1UL,j4) : j4 )
1181  :( UPP ? max(j,j4)+1UL : j4+1UL ) )
1182  :( UPP ? j+1UL : A.rows() ) );
1183  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1184 
1185  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1186  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1187 
1188  size_t i( ibegin );
1189 
1190  for( ; i<ipos; i+=SIMDSIZE ) {
1191  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
1192  }
1193  for( ; remainder && i<iend; ++i ) {
1194  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1195  }
1196  }
1197 
1198  for( ; element!=end; ++element )
1199  {
1200  const size_t j1( element->index() );
1201  const ET2 v1( element->value() );
1202 
1203  const SIMDType xmm1( set( v1 ) );
1204 
1205  const size_t ibegin( ( IsLower_v<MT4> )
1206  ?( ( IsStrictlyLower_v<MT4> )
1207  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1208  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1209  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1210  const size_t iend( ( IsUpper_v<MT4> )
1211  ?( ( IsStrictlyUpper_v<MT4> )
1212  ?( UPP ? max(j+1UL,j1) : j1 )
1213  :( UPP ? max(j,j1)+1UL : j1+1UL ) )
1214  :( UPP ? j+1UL : A.rows() ) );
1215  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1216 
1217  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1218  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1219 
1220  size_t i( ibegin );
1221 
1222  for( ; i<ipos; i+=SIMDSIZE ) {
1223  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
1224  }
1225  for( ; remainder && i<iend; ++i ) {
1226  C(i,j) += A(i,j1) * v1;
1227  }
1228  }
1229  }
1230  }
1232  //**********************************************************************************************
1233 
1234  //**Addition assignment to sparse matrices******************************************************
1235  // No special implementation for the addition assignment to sparse matrices.
1236  //**********************************************************************************************
1237 
1238  //**Subtraction assignment to dense matrices****************************************************
1251  template< typename MT // Type of the target dense matrix
1252  , bool SO > // Storage order of the target dense matrix
1253  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1254  {
1256 
1257  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1258  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1259 
1260  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1261  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side sparse matrix operand
1262 
1263  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1264  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1265  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1266  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1267  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1268  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1269 
1270  TDMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1271  }
1273  //**********************************************************************************************
1274 
1275  //**Default subtraction assignment to dense matrices********************************************
1289  template< typename MT3 // Type of the left-hand side target matrix
1290  , typename MT4 // Type of the left-hand side matrix operand
1291  , typename MT5 > // Type of the right-hand side matrix operand
1292  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1293  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
1294  {
1295  const size_t block( IsColumnMajorMatrix_v<MT3> || IsDiagonal_v<MT4> ? A.rows() : 64UL );
1296 
1297  for( size_t ii=0UL; ii<A.rows(); ii+=block )
1298  {
1299  const size_t itmp( min( ii+block, A.rows() ) );
1300 
1301  for( size_t j=0UL; j<B.columns(); ++j )
1302  {
1303  auto element( B.begin(j) );
1304  const auto end( B.end(j) );
1305 
1306  for( ; element!=end; ++element )
1307  {
1308  const size_t j1( element->index() );
1309 
1310  if( IsDiagonal_v<MT4> )
1311  {
1312  C(j1,j) -= A(j1,j1) * element->value();
1313  }
1314  else
1315  {
1316  const size_t ibegin( ( IsLower_v<MT4> )
1317  ?( ( LOW )
1318  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1319  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1320  :( LOW ? max(j,ii) : ii ) );
1321  const size_t iend( ( IsUpper_v<MT4> )
1322  ?( ( UPP )
1323  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
1324  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
1325  :( UPP ? min(j+1UL,itmp) : itmp ) );
1326 
1327  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1328  continue;
1329 
1330  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1331 
1332  const size_t inum( iend - ibegin );
1333  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1334  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1335 
1336  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1337  C(i ,j) -= A(i ,j1) * element->value();
1338  C(i+1UL,j) -= A(i+1UL,j1) * element->value();
1339  C(i+2UL,j) -= A(i+2UL,j1) * element->value();
1340  C(i+3UL,j) -= A(i+3UL,j1) * element->value();
1341  }
1342  for( size_t i=ipos; i<iend; ++i ) {
1343  C(i,j) -= A(i,j1) * element->value();
1344  }
1345  }
1346  }
1347  }
1348  }
1349  }
1351  //**********************************************************************************************
1352 
1353  //**Optimized subtraction assignment to dense matrices******************************************
1367  template< typename MT3 // Type of the left-hand side target matrix
1368  , typename MT4 // Type of the left-hand side matrix operand
1369  , typename MT5 > // Type of the right-hand side matrix operand
1370  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1371  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1372  {
1373  const size_t block( IsColumnMajorMatrix_v<MT3> ? A.rows() : 64UL );
1374 
1375  for( size_t ii=0UL; ii<A.rows(); ii+=block )
1376  {
1377  const size_t itmp( min( ii+block, A.rows() ) );
1378 
1379  for( size_t j=0UL; j<B.columns(); ++j )
1380  {
1381  const auto end( B.end(j) );
1382  auto element( B.begin(j) );
1383 
1384  const size_t nonzeros( B.nonZeros(j) );
1385  const size_t kpos( nonzeros & size_t(-4) );
1386  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1387 
1388  for( size_t k=0UL; k<kpos; k+=4UL )
1389  {
1390  const size_t j1( element->index() );
1391  const ET2 v1( element->value() );
1392  ++element;
1393  const size_t j2( element->index() );
1394  const ET2 v2( element->value() );
1395  ++element;
1396  const size_t j3( element->index() );
1397  const ET2 v3( element->value() );
1398  ++element;
1399  const size_t j4( element->index() );
1400  const ET2 v4( element->value() );
1401  ++element;
1402 
1403  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1404 
1405  const size_t ibegin( ( IsLower_v<MT4> )
1406  ?( ( LOW )
1407  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1408  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1409  :( LOW ? max(j,ii) : ii ) );
1410  const size_t iend( ( IsUpper_v<MT4> )
1411  ?( ( UPP )
1412  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) )
1413  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) ) )
1414  :( UPP ? min(j+1UL,itmp) : itmp ) );
1415 
1416  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1417  continue;
1418 
1419  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1420 
1421  const size_t inum( iend - ibegin );
1422  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1423  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1424 
1425  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1426  C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1427  C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1428  C(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1429  C(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1430  }
1431  for( size_t i=ipos; i<iend; ++i ) {
1432  C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1433  }
1434  }
1435 
1436  for( ; element!=end; ++element )
1437  {
1438  const size_t j1( element->index() );
1439  const ET2 v1( element->value() );
1440 
1441  const size_t ibegin( ( IsLower_v<MT4> )
1442  ?( ( LOW )
1443  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1444  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1445  :( LOW ? max(j,ii) : ii ) );
1446  const size_t iend( ( IsUpper_v<MT4> )
1447  ?( ( UPP )
1448  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
1449  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
1450  :( UPP ? min(j+1UL,itmp) : itmp ) );
1451 
1452  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1453  continue;
1454 
1455  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1456 
1457  const size_t inum( iend - ibegin );
1458  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1459  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1460 
1461  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1462  C(i ,j) -= A(i ,j1) * v1;
1463  C(i+1UL,j) -= A(i+1UL,j1) * v1;
1464  C(i+2UL,j) -= A(i+2UL,j1) * v1;
1465  C(i+3UL,j) -= A(i+3UL,j1) * v1;
1466  }
1467  for( size_t i=ipos; i<iend; ++i ) {
1468  C(i,j) -= A(i,j1) * v1;
1469  }
1470  }
1471  }
1472  }
1473  }
1475  //**********************************************************************************************
1476 
1477  //**Vectorized subtraction assignment to column-major dense matrices****************************
1491  template< typename MT3 // Type of the left-hand side target matrix
1492  , typename MT4 // Type of the left-hand side matrix operand
1493  , typename MT5 > // Type of the right-hand side matrix operand
1494  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1495  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1496  {
1497  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT4> );
1498 
1499  for( size_t j=0UL; j<B.columns(); ++j )
1500  {
1501  const auto end( B.end(j) );
1502  auto element( B.begin(j) );
1503 
1504  const size_t nonzeros( B.nonZeros(j) );
1505  const size_t kpos( nonzeros & size_t(-4) );
1506  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1507 
1508  for( size_t k=0UL; k<kpos; k+=4UL )
1509  {
1510  const size_t j1( element->index() );
1511  const ET2 v1( element->value() );
1512  ++element;
1513  const size_t j2( element->index() );
1514  const ET2 v2( element->value() );
1515  ++element;
1516  const size_t j3( element->index() );
1517  const ET2 v3( element->value() );
1518  ++element;
1519  const size_t j4( element->index() );
1520  const ET2 v4( element->value() );
1521  ++element;
1522 
1523  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1524 
1525  const SIMDType xmm1( set( v1 ) );
1526  const SIMDType xmm2( set( v2 ) );
1527  const SIMDType xmm3( set( v3 ) );
1528  const SIMDType xmm4( set( v4 ) );
1529 
1530  const size_t ibegin( ( IsLower_v<MT4> )
1531  ?( ( IsStrictlyLower_v<MT4> )
1532  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1533  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1534  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1535  const size_t iend( ( IsUpper_v<MT4> )
1536  ?( ( IsStrictlyUpper_v<MT4> )
1537  ?( UPP ? max(j+1UL,j4) : j4 )
1538  :( UPP ? max(j,j4)+1UL : j4+1UL ) )
1539  :( UPP ? j+1UL : A.rows() ) );
1540  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1541 
1542  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1543  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1544 
1545  size_t i( ibegin );
1546 
1547  for( ; i<ipos; i+=SIMDSIZE ) {
1548  C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 - A.load(i,j2) * xmm2 - A.load(i,j3) * xmm3 - A.load(i,j4) * xmm4 );
1549  }
1550  for( ; remainder && i<iend; ++i ) {
1551  C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1552  }
1553  }
1554 
1555  for( ; element!=end; ++element )
1556  {
1557  const size_t j1( element->index() );
1558  const ET2 v1( element->value() );
1559 
1560  const SIMDType xmm1( set( v1 ) );
1561 
1562  const size_t ibegin( ( IsLower_v<MT4> )
1563  ?( ( IsStrictlyLower_v<MT4> )
1564  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1565  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1566  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1567  const size_t iend( ( IsUpper_v<MT4> )
1568  ?( ( IsStrictlyUpper_v<MT4> )
1569  ?( UPP ? max(j+1UL,j1) : j1 )
1570  :( UPP ? max(j,j1)+1UL : j1+1UL ) )
1571  :( UPP ? j+1UL : A.rows() ) );
1572  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1573 
1574  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1575  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1576 
1577  size_t i( ibegin );
1578 
1579  for( ; i<ipos; i+=SIMDSIZE ) {
1580  C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 );
1581  }
1582  for( ; remainder && i<iend; ++i ) {
1583  C(i,j) -= A(i,j1) * v1;
1584  }
1585  }
1586  }
1587  }
1589  //**********************************************************************************************
1590 
1591  //**Subtraction assignment to sparse matrices***************************************************
1592  // No special implementation for the subtraction assignment to sparse matrices.
1593  //**********************************************************************************************
1594 
1595  //**Schur product assignment to dense matrices**************************************************
1608  template< typename MT // Type of the target dense matrix
1609  , bool SO > // Storage order of the target dense matrix
1610  friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1611  {
1613 
1617 
1618  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1619  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1620 
1621  const ResultType tmp( serial( rhs ) );
1622  schurAssign( ~lhs, tmp );
1623  }
1625  //**********************************************************************************************
1626 
1627  //**Schur product assignment to sparse matrices*************************************************
1628  // No special implementation for the Schur product assignment to sparse matrices.
1629  //**********************************************************************************************
1630 
1631  //**Multiplication assignment to dense matrices*************************************************
1632  // No special implementation for the multiplication assignment to dense matrices.
1633  //**********************************************************************************************
1634 
1635  //**Multiplication assignment to sparse matrices************************************************
1636  // No special implementation for the multiplication assignment to sparse matrices.
1637  //**********************************************************************************************
1638 
1639  //**SMP assignment to dense matrices************************************************************
1654  template< typename MT // Type of the target dense matrix
1655  , bool SO > // Storage order of the target dense matrix
1656  friend inline auto smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1657  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1658  {
1660 
1661  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1662  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1663 
1664  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1665  RT B( rhs.rhs_ ); // Evaluation of the right-hand side sparse matrix operand
1666 
1667  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1668  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1669  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1670  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1671  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1672  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1673 
1674  smpAssign( ~lhs, A * B );
1675  }
1677  //**********************************************************************************************
1678 
1679  //**SMP assignment to sparse matrices***********************************************************
1694  template< typename MT // Type of the target sparse matrix
1695  , bool SO > // Storage order of the target sparse matrix
1696  friend inline auto smpAssign( SparseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1697  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1698  {
1700 
1701  using TmpType = If_t< SO, ResultType, OppositeType >;
1702 
1709 
1710  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1711  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1712 
1713  const ForwardFunctor fwd;
1714 
1715  const TmpType tmp( rhs );
1716  smpAssign( ~lhs, fwd( tmp ) );
1717  }
1719  //**********************************************************************************************
1720 
1721  //**SMP addition assignment to dense matrices***************************************************
1736  template< typename MT // Type of the target dense matrix
1737  , bool SO > // Storage order of the target dense matrix
1738  friend inline auto smpAddAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1739  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1740  {
1742 
1743  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1744  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1745 
1746  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1747  RT B( rhs.rhs_ ); // Evaluation of the right-hand side sparse matrix operand
1748 
1749  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1750  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1751  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1752  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1753  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1754  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1755 
1756  smpAddAssign( ~lhs, A * B );
1757  }
1759  //**********************************************************************************************
1760 
1761  //**SMP addition assignment to sparse matrices**************************************************
1762  // No special implementation for the SMP addition assignment to sparse matrices.
1763  //**********************************************************************************************
1764 
1765  //**SMP subtraction assignment to dense matrices************************************************
1780  template< typename MT // Type of the target dense matrix
1781  , bool SO > // Storage order of the target dense matrix
1782  friend inline auto smpSubAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1783  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1784  {
1786 
1787  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1788  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1789 
1790  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1791  RT B( rhs.rhs_ ); // Evaluation of the right-hand side sparse matrix operand
1792 
1793  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1794  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1795  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1796  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1797  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1798  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1799 
1800  smpSubAssign( ~lhs, A * B );
1801  }
1803  //**********************************************************************************************
1804 
1805  //**SMP subtraction assignment to sparse matrices***********************************************
1806  // No special implementation for the SMP subtraction assignment to sparse matrices.
1807  //**********************************************************************************************
1808 
1809  //**SMP Schur product assignment to dense matrices**********************************************
1822  template< typename MT // Type of the target dense matrix
1823  , bool SO > // Storage order of the target dense matrix
1824  friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1825  {
1827 
1831 
1832  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1833  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1834 
1835  const ResultType tmp( rhs );
1836  smpSchurAssign( ~lhs, tmp );
1837  }
1839  //**********************************************************************************************
1840 
1841  //**SMP Schur product assignment to sparse matrices*********************************************
1842  // No special implementation for the SMP Schur product assignment to sparse matrices.
1843  //**********************************************************************************************
1844 
1845  //**SMP multiplication assignment to dense matrices*********************************************
1846  // No special implementation for the SMP multiplication assignment to dense matrices.
1847  //**********************************************************************************************
1848 
1849  //**SMP multiplication assignment to sparse matrices********************************************
1850  // No special implementation for the SMP multiplication assignment to sparse matrices.
1851  //**********************************************************************************************
1852 
1853  //**Compile time checks*************************************************************************
1862  //**********************************************************************************************
1863 };
1864 //*************************************************************************************************
1865 
1866 
1867 
1868 
1869 //=================================================================================================
1870 //
1871 // GLOBAL BINARY ARITHMETIC OPERATORS
1872 //
1873 //=================================================================================================
1874 
1875 //*************************************************************************************************
1888 template< typename MT1 // Type of the left-hand side dense matrix
1889  , typename MT2 // Type of the right-hand side sparse matrix
1890  , DisableIf_t< ( IsIdentity_v<MT2> &&
1891  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > ) ||
1892  IsZero_v<MT2> >* = nullptr >
1893 inline const TDMatTSMatMultExpr<MT1,MT2,false,false,false,false>
1894  tdmattsmatmult( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
1895 {
1897 
1898  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1899 
1900  return TDMatTSMatMultExpr<MT1,MT2,false,false,false,false>( ~lhs, ~rhs );
1901 }
1903 //*************************************************************************************************
1904 
1905 
1906 //*************************************************************************************************
1920 template< typename MT1 // Type of the left-hand side dense matrix
1921  , typename MT2 // Type of the right-hand side sparse matrix
1922  , EnableIf_t< IsIdentity_v<MT2> &&
1923  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > >* = nullptr >
1924 inline const MT1&
1925  tdmattsmatmult( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
1926 {
1928 
1929  UNUSED_PARAMETER( rhs );
1930 
1931  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1932 
1933  return (~lhs);
1934 }
1936 //*************************************************************************************************
1937 
1938 
1939 //*************************************************************************************************
1952 template< typename MT1 // Type of the left-hand side dense matrix
1953  , typename MT2 // Type of the right-hand side sparse matrix
1954  , EnableIf_t< IsZero_v<MT2> >* = nullptr >
1955 inline decltype(auto)
1956  tdmattsmatmult( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
1957 {
1959 
1960  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1961 
1962  using ReturnType = const MultTrait_t< ResultType_t<MT1>, ResultType_t<MT2> >;
1963 
1966 
1967  return ReturnType( (~lhs).rows(), (~rhs).columns() );
1968 }
1970 //*************************************************************************************************
1971 
1972 
1973 //*************************************************************************************************
2002 template< typename MT1 // Type of the left-hand side dense matrix
2003  , typename MT2 > // Type of the right-hand side sparse matrix
2004 inline decltype(auto)
2005  operator*( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
2006 {
2008 
2009  if( (~lhs).columns() != (~rhs).rows() ) {
2010  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
2011  }
2012 
2013  return tdmattsmatmult( ~lhs, ~rhs );
2014 }
2015 //*************************************************************************************************
2016 
2017 
2018 
2019 
2020 //=================================================================================================
2021 //
2022 // GLOBAL FUNCTIONS
2023 //
2024 //=================================================================================================
2025 
2026 //*************************************************************************************************
2050 template< typename MT1 // Type of the left-hand side dense matrix
2051  , typename MT2 // Type of the right-hand side dense matrix
2052  , bool SF // Symmetry flag
2053  , bool HF // Hermitian flag
2054  , bool LF // Lower flag
2055  , bool UF > // Upper flag
2056 inline decltype(auto) declsym( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2057 {
2059 
2060  if( !isSquare( dm ) ) {
2061  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
2062  }
2063 
2064  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,true,HF,LF,UF>;
2065  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2066 }
2068 //*************************************************************************************************
2069 
2070 
2071 //*************************************************************************************************
2095 template< typename MT1 // Type of the left-hand side dense matrix
2096  , typename MT2 // Type of the right-hand side dense matrix
2097  , bool SF // Symmetry flag
2098  , bool HF // Hermitian flag
2099  , bool LF // Lower flag
2100  , bool UF > // Upper flag
2101 inline decltype(auto) declherm( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2102 {
2104 
2105  if( !isSquare( dm ) ) {
2106  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
2107  }
2108 
2109  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,true,LF,UF>;
2110  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2111 }
2113 //*************************************************************************************************
2114 
2115 
2116 //*************************************************************************************************
2140 template< typename MT1 // Type of the left-hand side dense matrix
2141  , typename MT2 // Type of the right-hand side dense matrix
2142  , bool SF // Symmetry flag
2143  , bool HF // Hermitian flag
2144  , bool LF // Lower flag
2145  , bool UF > // Upper flag
2146 inline decltype(auto) decllow( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2147 {
2149 
2150  if( !isSquare( dm ) ) {
2151  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
2152  }
2153 
2154  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,HF,true,UF>;
2155  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2156 }
2158 //*************************************************************************************************
2159 
2160 
2161 //*************************************************************************************************
2185 template< typename MT1 // Type of the left-hand side dense matrix
2186  , typename MT2 // Type of the right-hand side dense matrix
2187  , bool SF // Symmetry flag
2188  , bool HF // Hermitian flag
2189  , bool LF // Lower flag
2190  , bool UF > // Upper flag
2191 inline decltype(auto) declupp( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2192 {
2194 
2195  if( !isSquare( dm ) ) {
2196  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
2197  }
2198 
2199  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,true>;
2200  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2201 }
2203 //*************************************************************************************************
2204 
2205 
2206 //*************************************************************************************************
2230 template< typename MT1 // Type of the left-hand side dense matrix
2231  , typename MT2 // Type of the right-hand side dense matrix
2232  , bool SF // Symmetry flag
2233  , bool HF // Hermitian flag
2234  , bool LF // Lower flag
2235  , bool UF > // Upper flag
2236 inline decltype(auto) decldiag( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2237 {
2239 
2240  if( !isSquare( dm ) ) {
2241  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
2242  }
2243 
2244  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,HF,true,true>;
2245  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2246 }
2248 //*************************************************************************************************
2249 
2250 
2251 
2252 
2253 //=================================================================================================
2254 //
2255 // SIZE SPECIALIZATIONS
2256 //
2257 //=================================================================================================
2258 
2259 //*************************************************************************************************
2261 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2262 struct Size< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2263  : public Size<MT1,0UL>
2264 {};
2265 
2266 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2267 struct Size< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2268  : public Size<MT2,1UL>
2269 {};
2271 //*************************************************************************************************
2272 
2273 
2274 
2275 
2276 //=================================================================================================
2277 //
2278 // ISALIGNED SPECIALIZATIONS
2279 //
2280 //=================================================================================================
2281 
2282 //*************************************************************************************************
2284 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2285 struct IsAligned< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2286  : public IsAligned<MT1>
2287 {};
2289 //*************************************************************************************************
2290 
2291 } // namespace blaze
2292 
2293 #endif
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:329
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
static constexpr bool evaluateRight
Compilation switch for the composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:150
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Header file for the decldiag trait.
RightOperand rhs_
Right-hand side sparse matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:463
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:411
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:975
CompositeType_t< MT2 > CT2
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:140
Header file for basic type definitions.
ResultType_t< MT2 > RT2
Result type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:136
typename If< Condition, T1, T2 >::Type If_t
Auxiliary alias declaration for the If class template.The If_t alias declaration provides a convenien...
Definition: If.h:109
ElementType_t< RT2 > ET2
Element type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:138
Header file for the declherm trait.
ElementType_t< ResultType > ElementType
Resulting element type.
Definition: TDMatTSMatMultExpr.h:259
Expression object for transpose dense matrix-transpose sparse matrix multiplications.The TDMatTSMatMultExpr class represents the compile time expression for multiplications between a column-major dense matrix and a column-major sparse matrix.
Definition: Forward.h:164
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTSMatMultExpr.h:401
typename T::ResultType ResultType_t
Alias declaration for nested ResultType type definitions.The ResultType_t alias declaration provides ...
Definition: Aliases.h:390
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Base template for the DeclUppTrait class.
Definition: DeclUppTrait.h:134
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:372
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTSMatMultExpr.h:435
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:591
static constexpr bool smpAssignable
Compilation flag for SMP assignments.
Definition: CompressedMatrix.h:3113
static constexpr bool SYM
Flag for symmetric matrices.
Definition: TDMatTSMatMultExpr.h:154
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Constraint on the data type.
typename SIMDTrait< T >::Type SIMDTrait_t
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_t alias declaration provid...
Definition: SIMDTrait.h:315
ElementType_t< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:137
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatTSMatMultExpr.h:423
Header file for the IsIdentity type trait.
typename If_t< HERM, DeclHermTrait< MultTrait_t< RT1, RT2 > >, If_t< SYM, DeclSymTrait< MultTrait_t< RT1, RT2 > >, If_t< LOW, If_t< UPP, DeclDiagTrait< MultTrait_t< RT1, RT2 > >, DeclLowTrait< MultTrait_t< RT1, RT2 > > >, If_t< UPP, DeclUppTrait< MultTrait_t< RT1, RT2 > >, MultTrait< RT1, RT2 > > > > >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:255
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1002
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Header file for the reset shim.
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
constexpr void UNUSED_PARAMETER(const Args &...)
Suppression of unused parameter warnings.
Definition: Unused.h:81
static constexpr bool LOW
Flag for lower matrices.
Definition: TDMatTSMatMultExpr.h:156
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:80
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:137
typename T::ElementType ElementType_t
Alias declaration for nested ElementType type definitions.The ElementType_t alias declaration provide...
Definition: Aliases.h:170
Constraint on the data type.
Constraint on the data type.
typename EnableIf< Condition, T >::Type EnableIf_t
Auxiliary type for the EnableIf class template.The EnableIf_t alias declaration provides a convenient...
Definition: EnableIf.h:138
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: TDMatTSMatMultExpr.h:391
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: TDMatTSMatMultExpr.h:381
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the DeclLow functor.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is not a zero vector or matrix type...
Definition: Zero.h:61
TDMatTSMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the TDMatTSMatMultExpr class.
Definition: TDMatTSMatMultExpr.h:301
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1147
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:261
Header file for the decllow trait.
static constexpr bool evaluateLeft
Compilation switch for the composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:145
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
ResultType_t< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:135
SIMDTrait_t< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatTSMatMultExpr.h:260
Header file for all SIMD functionality.
static constexpr bool UPP
Flag for upper matrices.
Definition: TDMatTSMatMultExpr.h:157
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1002
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Generic wrapper for the null function.
Definition: Noop.h:59
Header file for the IsTriangular type trait.
Base template for the DeclSymTrait class.
Definition: DeclSymTrait.h:134
Constraints on the storage order of matrix types.
static constexpr bool simdEnabled
Compilation switch for the expression template evaluation strategy.
Definition: TDMatTSMatMultExpr.h:279
Header file for the exception macros of the math module.
decltype(auto) max(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise maximum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1179
MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:438
Header file for the DeclDiag functor.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:316
static constexpr bool smpAssignable
Compilation switch for the expression template assignment strategy.
Definition: TDMatTSMatMultExpr.h:286
Constraint on the data type.
Header file for all forward declarations for expression class templates.
CompositeType_t< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:139
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the IsPadded type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:103
typename T::OppositeType OppositeType_t
Alias declaration for nested OppositeType type definitions.The OppositeType_t alias declaration provi...
Definition: Aliases.h:270
Header file for the conjugate shim.
Header file for the declupp trait.
Header file for the IsSIMDCombinable type trait.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
If_t< IsExpression_v< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:268
typename T::TransposeType TransposeType_t
Alias declaration for nested TransposeType type definitions.The TransposeType_t alias declaration pro...
Definition: Aliases.h:470
Header file for run time assertion macros.
Base template for the DeclHermTrait class.
Definition: DeclHermTrait.h:134
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.The CompositeType_t alias declaration pro...
Definition: Aliases.h:90
Base template for the MultTrait class.
Definition: MultTrait.h:146
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
TransposeType_t< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:258
Header file for the IsZero type trait.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:295
Header file for the declsym trait.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1002
Header file for the isDefault shim.
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:808
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
static constexpr size_t SIMDSIZE
The number of elements packed within a single SIMD element.
Definition: TDMatTSMatMultExpr.h:292
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3081
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:365
Base template for the DeclLowTrait class.
Definition: DeclLowTrait.h:134
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1002
If_t< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTSMatMultExpr.h:271
If_t< IsExpression_v< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:265
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for the IntegralConstant class template.
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:462
If_t< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:274
Header file for the DeclHerm functor.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTSMatMultExpr.h:455
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:631
Header file for the IsUpper type trait.
static constexpr bool HERM
Flag for Hermitian matrices.
Definition: TDMatTSMatMultExpr.h:155
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1326
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
Base template for the DeclDiagTrait class.
Definition: DeclDiagTrait.h:134
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTSMatMultExpr.h:262
bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:951
Header file for the IsResizable type trait.
OppositeType_t< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:257
Header file for the Size type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is a zero vector or matrix type...
Definition: Zero.h:81
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type...
Definition: SparseMatrix.h:61
Header file for the IsExpression type trait class.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTSMatMultExpr.h:445
Header file for the function trace functionality.