Blaze  3.6
TDMatTSMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTSMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
52 #include <blaze/math/Exception.h>
65 #include <blaze/math/shims/Reset.h>
67 #include <blaze/math/SIMD.h>
93 #include <blaze/math/views/Check.h>
98 #include <blaze/util/Assert.h>
99 #include <blaze/util/DisableIf.h>
100 #include <blaze/util/EnableIf.h>
103 #include <blaze/util/MaybeUnused.h>
104 #include <blaze/util/mpl/If.h>
105 #include <blaze/util/Types.h>
107 
108 
109 namespace blaze {
110 
111 //=================================================================================================
112 //
113 // CLASS TDMATTSMATMULTEXPR
114 //
115 //=================================================================================================
116 
117 //*************************************************************************************************
124 template< typename MT1 // Type of the left-hand side dense matrix
125  , typename MT2 // Type of the right-hand side sparse matrix
126  , bool SF // Symmetry flag
127  , bool HF // Hermitian flag
128  , bool LF // Lower flag
129  , bool UF > // Upper flag
130 class TDMatTSMatMultExpr
131  : public MatMatMultExpr< DenseMatrix< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, true > >
132  , private Computation
133 {
134  private:
135  //**Type definitions****************************************************************************
142  //**********************************************************************************************
143 
144  //**********************************************************************************************
146  static constexpr bool evaluateLeft = ( IsComputation_v<MT1> || RequiresEvaluation_v<MT1> );
147  //**********************************************************************************************
148 
149  //**********************************************************************************************
151  static constexpr bool evaluateRight = ( IsComputation_v<MT2> || RequiresEvaluation_v<MT2> );
152  //**********************************************************************************************
153 
154  //**********************************************************************************************
155  static constexpr bool SYM = ( SF && !( HF || LF || UF ) );
156  static constexpr bool HERM = ( HF && !( LF || UF ) );
157  static constexpr bool LOW = ( LF || ( ( SF || HF ) && UF ) );
158  static constexpr bool UPP = ( UF || ( ( SF || HF ) && LF ) );
159  //**********************************************************************************************
160 
161  //**********************************************************************************************
163 
167  template< typename T1, typename T2, typename T3 >
168  static constexpr bool IsEvaluationRequired_v = ( evaluateLeft || evaluateRight );
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
177  template< typename T1, typename T2, typename T3 >
178  static constexpr bool UseVectorizedKernel_v =
179  ( useOptimizedKernels &&
180  !IsDiagonal_v<T2> &&
181  T1::simdEnabled && T2::simdEnabled &&
182  IsColumnMajorMatrix_v<T1> &&
183  IsSIMDCombinable_v< ElementType_t<T1>
185  , ElementType_t<T3> > &&
186  HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
187  HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  static constexpr bool UseOptimizedKernel_v =
199  ( useOptimizedKernels &&
200  !UseVectorizedKernel_v<T1,T2,T3> &&
201  !IsDiagonal_v<T2> &&
202  !IsResizable_v< ElementType_t<T1> > &&
203  !IsResizable_v<ET2> );
205  //**********************************************************************************************
206 
207  //**********************************************************************************************
209 
212  template< typename T1, typename T2, typename T3 >
213  static constexpr bool UseDefaultKernel_v =
214  ( !UseVectorizedKernel_v<T1,T2,T3> && !UseOptimizedKernel_v<T1,T2,T3> );
216  //**********************************************************************************************
217 
218  //**********************************************************************************************
220 
223  using ForwardFunctor = If_t< HERM
224  , DeclHerm
225  , If_t< SYM
226  , DeclSym
227  , If_t< LOW
228  , If_t< UPP
229  , DeclDiag
230  , DeclLow >
231  , If_t< UPP
232  , DeclUpp
233  , Noop > > > >;
235  //**********************************************************************************************
236 
237  public:
238  //**Type definitions****************************************************************************
241 
244 
246  using ResultType = typename If_t< HERM
248  , If_t< SYM
250  , If_t< LOW
251  , If_t< UPP
254  , If_t< UPP
256  , MultTrait<RT1,RT2> > > > >::Type;
257 
262  using ReturnType = const ElementType;
263  using CompositeType = const ResultType;
264 
266  using LeftOperand = If_t< IsExpression_v<MT1>, const MT1, const MT1& >;
267 
269  using RightOperand = If_t< IsExpression_v<MT2>, const MT2, const MT2& >;
270 
273 
276  //**********************************************************************************************
277 
278  //**Compilation flags***************************************************************************
280  static constexpr bool simdEnabled =
281  ( !IsDiagonal_v<MT1> &&
282  MT1::simdEnabled &&
283  HasSIMDAdd_v<ET1,ET2> &&
284  HasSIMDMult_v<ET1,ET2> );
285 
287  static constexpr bool smpAssignable =
288  ( !evaluateLeft && MT1::smpAssignable && !evaluateRight && MT2::smpAssignable );
289  //**********************************************************************************************
290 
291  //**SIMD properties*****************************************************************************
293  static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
294  //**********************************************************************************************
295 
296  //**Constructor*********************************************************************************
302  explicit inline TDMatTSMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
303  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
304  , rhs_( rhs ) // Right-hand side sparse matrix of the multiplication expression
305  {
306  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
307  }
308  //**********************************************************************************************
309 
310  //**Access operator*****************************************************************************
317  inline ReturnType operator()( size_t i, size_t j ) const {
318  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
319  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
320 
321  if( IsDiagonal_v<MT1> ) {
322  return lhs_(i,i) * rhs_(i,j);
323  }
324  else if( IsDiagonal_v<MT2> ) {
325  return lhs_(i,j) * rhs_(j,j);
326  }
327  else if( IsTriangular_v<MT1> || IsTriangular_v<MT2> ) {
328  const size_t begin( ( IsUpper_v<MT1> )
329  ?( ( IsLower_v<MT2> )
330  ?( max( ( IsStrictlyUpper_v<MT1> ? i+1UL : i )
331  , ( IsStrictlyLower_v<MT2> ? j+1UL : j ) ) )
332  :( IsStrictlyUpper_v<MT1> ? i+1UL : i ) )
333  :( ( IsLower_v<MT2> )
334  ?( IsStrictlyLower_v<MT2> ? j+1UL : j )
335  :( 0UL ) ) );
336  const size_t end( ( IsLower_v<MT1> )
337  ?( ( IsUpper_v<MT2> )
338  ?( min( ( IsStrictlyLower_v<MT1> ? i : i+1UL )
339  , ( IsStrictlyUpper_v<MT2> ? j : j+1UL ) ) )
340  :( IsStrictlyLower_v<MT1> ? i : i+1UL ) )
341  :( ( IsUpper_v<MT2> )
342  ?( IsStrictlyUpper_v<MT2> ? j : j+1UL )
343  :( lhs_.columns() ) ) );
344 
345  if( begin >= end ) return ElementType();
346 
347  const size_t n( end - begin );
348 
349  return subvector( row( lhs_, i, unchecked ), begin, n, unchecked ) *
350  subvector( column( rhs_, j, unchecked ), begin, n, unchecked );
351  }
352  else {
353  return row( lhs_, i, unchecked ) * column( rhs_, j, unchecked );
354  }
355  }
356  //**********************************************************************************************
357 
358  //**At function*********************************************************************************
366  inline ReturnType at( size_t i, size_t j ) const {
367  if( i >= lhs_.rows() ) {
368  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
369  }
370  if( j >= rhs_.columns() ) {
371  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
372  }
373  return (*this)(i,j);
374  }
375  //**********************************************************************************************
376 
377  //**Rows function*******************************************************************************
382  inline size_t rows() const noexcept {
383  return lhs_.rows();
384  }
385  //**********************************************************************************************
386 
387  //**Columns function****************************************************************************
392  inline size_t columns() const noexcept {
393  return rhs_.columns();
394  }
395  //**********************************************************************************************
396 
397  //**Left operand access*************************************************************************
402  inline LeftOperand leftOperand() const noexcept {
403  return lhs_;
404  }
405  //**********************************************************************************************
406 
407  //**Right operand access************************************************************************
412  inline RightOperand rightOperand() const noexcept {
413  return rhs_;
414  }
415  //**********************************************************************************************
416 
417  //**********************************************************************************************
423  template< typename T >
424  inline bool canAlias( const T* alias ) const noexcept {
425  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
426  }
427  //**********************************************************************************************
428 
429  //**********************************************************************************************
435  template< typename T >
436  inline bool isAliased( const T* alias ) const noexcept {
437  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
438  }
439  //**********************************************************************************************
440 
441  //**********************************************************************************************
446  inline bool isAligned() const noexcept {
447  return lhs_.isAligned();
448  }
449  //**********************************************************************************************
450 
451  //**********************************************************************************************
456  inline bool canSMPAssign() const noexcept {
457  return ( rows() * columns() >= SMP_TDMATTSMATMULT_THRESHOLD ) && !IsDiagonal_v<MT1>;
458  }
459  //**********************************************************************************************
460 
461  private:
462  //**Member variables****************************************************************************
465  //**********************************************************************************************
466 
467  //**Assignment to dense matrices****************************************************************
480  template< typename MT // Type of the target dense matrix
481  , bool SO > // Storage order of the target dense matrix
482  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
483  {
485 
486  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
487  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
488 
489  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
490  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side sparse matrix operand
491 
492  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
493  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
494  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
495  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
496  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
497  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
498 
499  TDMatTSMatMultExpr::selectAssignKernel( ~lhs, A, B );
500  }
502  //**********************************************************************************************
503 
504  //**Default assignment to dense matrices********************************************************
518  template< typename MT3 // Type of the left-hand side target matrix
519  , typename MT4 // Type of the left-hand side matrix operand
520  , typename MT5 > // Type of the right-hand side matrix operand
521  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
523  {
524  const size_t block( IsColumnMajorMatrix_v<MT3> || IsDiagonal_v<MT4> ? A.rows() : 64UL );
525 
526  reset( C );
527 
528  for( size_t ii=0UL; ii<A.rows(); ii+=block )
529  {
530  const size_t itmp( min( ii+block, A.rows() ) );
531 
532  for( size_t j=0UL; j<B.columns(); ++j )
533  {
534  auto element( B.begin(j) );
535  const auto end( B.end(j) );
536 
537  for( ; element!=end; ++element )
538  {
539  const size_t j1( element->index() );
540 
541  if( IsDiagonal_v<MT4> )
542  {
543  C(j1,j) = A(j1,j1) * element->value();
544  }
545  else
546  {
547  const size_t ibegin( ( IsLower_v<MT4> )
548  ?( ( LOW )
549  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
550  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
551  :( LOW ? max(j,ii) : ii ) );
552  const size_t iend( ( IsUpper_v<MT4> )
553  ?( ( SYM || HERM || UPP )
554  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
555  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
556  :( SYM || HERM || UPP ? min(j+1UL,itmp) : itmp ) );
557 
558  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
559  continue;
560 
561  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
562 
563  for( size_t i=ibegin; i<iend; ++i ) {
564  if( isDefault( C(i,j) ) )
565  C(i,j) = A(i,j1) * element->value();
566  else
567  C(i,j) += A(i,j1) * element->value();
568  }
569  }
570  }
571  }
572  }
573 
574  if( SYM || HERM ) {
575  for( size_t j=0UL; j<B.columns(); ++j ) {
576  for( size_t i=j+1UL; i<A.rows(); ++i ) {
577  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
578  }
579  }
580  }
581  }
583  //**********************************************************************************************
584 
585  //**Optimized assignment to dense matrices******************************************************
599  template< typename MT3 // Type of the left-hand side target matrix
600  , typename MT4 // Type of the left-hand side matrix operand
601  , typename MT5 > // Type of the right-hand side matrix operand
602  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
603  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
604  {
605  const size_t block( IsColumnMajorMatrix_v<MT3> ? A.rows() : 64UL );
606 
607  reset( C );
608 
609  for( size_t ii=0UL; ii<A.rows(); ii+=block )
610  {
611  const size_t itmp( min( ii+block, A.rows() ) );
612 
613  for( size_t j=0UL; j<B.columns(); ++j )
614  {
615  const auto end( B.end(j) );
616  auto element( B.begin(j) );
617 
618  const size_t nonzeros( B.nonZeros(j) );
619  const size_t kpos( nonzeros & size_t(-4) );
620  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
621 
622  for( size_t k=0UL; k<kpos; k+=4UL )
623  {
624  const size_t j1( element->index() );
625  const ET2 v1( element->value() );
626  ++element;
627  const size_t j2( element->index() );
628  const ET2 v2( element->value() );
629  ++element;
630  const size_t j3( element->index() );
631  const ET2 v3( element->value() );
632  ++element;
633  const size_t j4( element->index() );
634  const ET2 v4( element->value() );
635  ++element;
636 
637  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
638 
639  const size_t ibegin( ( IsLower_v<MT4> )
640  ?( ( LOW )
641  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
642  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
643  :( LOW ? max(j,ii) : ii ) );
644  const size_t iend( ( IsUpper_v<MT4> )
645  ?( ( SYM || HERM || UPP )
646  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) )
647  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) ) )
648  :( SYM || HERM || UPP ? min(j+1UL,itmp) : itmp ) );
649 
650  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
651  continue;
652 
653  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
654 
655  const size_t inum( iend - ibegin );
656  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
657  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
658 
659  for( size_t i=ibegin; i<ipos; i+=4UL ) {
660  C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
661  C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
662  C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
663  C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
664  }
665  for( size_t i=ipos; i<iend; ++i ) {
666  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
667  }
668  }
669 
670  for( ; element!=end; ++element )
671  {
672  const size_t j1( element->index() );
673  const ET2 v1( element->value() );
674 
675  const size_t ibegin( ( IsLower_v<MT4> )
676  ?( ( LOW )
677  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
678  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
679  :( LOW ? max(j,ii) : ii ) );
680  const size_t iend( ( IsUpper_v<MT4> )
681  ?( ( SYM || HERM || UPP )
682  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
683  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
684  :( SYM || HERM || UPP ? min(j+1UL,itmp) : itmp ) );
685 
686  if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
687  continue;
688 
689  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
690 
691  const size_t inum( iend - ibegin );
692  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
693  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
694 
695  for( size_t i=ibegin; i<ipos; i+=4UL ) {
696  C(i ,j) += A(i ,j1) * v1;
697  C(i+1UL,j) += A(i+1UL,j1) * v1;
698  C(i+2UL,j) += A(i+2UL,j1) * v1;
699  C(i+3UL,j) += A(i+3UL,j1) * v1;
700  }
701  for( size_t i=ipos; i<iend; ++i ) {
702  C(i,j) += A(i,j1) * v1;
703  }
704  }
705  }
706  }
707 
708  if( SYM || HERM ) {
709  for( size_t j=0UL; j<B.columns(); ++j ) {
710  for( size_t i=j+1UL; i<A.rows(); ++i ) {
711  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
712  }
713  }
714  }
715  }
717  //**********************************************************************************************
718 
719  //**Vectorized assignment to column-major dense matrices****************************************
733  template< typename MT3 // Type of the left-hand side target matrix
734  , typename MT4 // Type of the left-hand side matrix operand
735  , typename MT5 > // Type of the right-hand side matrix operand
736  static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
737  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
738  {
739  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT4> );
740 
741  reset( C );
742 
743  for( size_t j=0UL; j<B.columns(); ++j )
744  {
745  const auto end( B.end(j) );
746  auto element( B.begin(j) );
747 
748  const size_t nonzeros( B.nonZeros(j) );
749  const size_t kpos( nonzeros & size_t(-4) );
750  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
751 
752  for( size_t k=0UL; k<kpos; k+=4UL )
753  {
754  const size_t j1( element->index() );
755  const ET2 v1( element->value() );
756  ++element;
757  const size_t j2( element->index() );
758  const ET2 v2( element->value() );
759  ++element;
760  const size_t j3( element->index() );
761  const ET2 v3( element->value() );
762  ++element;
763  const size_t j4( element->index() );
764  const ET2 v4( element->value() );
765  ++element;
766 
767  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
768 
769  const SIMDType xmm1( set( v1 ) );
770  const SIMDType xmm2( set( v2 ) );
771  const SIMDType xmm3( set( v3 ) );
772  const SIMDType xmm4( set( v4 ) );
773 
774  const size_t ibegin( ( IsLower_v<MT4> )
775  ?( ( IsStrictlyLower_v<MT4> )
776  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
777  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
778  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
779  const size_t iend( ( IsUpper_v<MT4> )
780  ?( ( IsStrictlyUpper_v<MT4> )
781  ?( SYM || HERM || UPP ? max(j+1UL,j4) : j4 )
782  :( SYM || HERM || UPP ? max(j,j4)+1UL : j4+1UL ) )
783  :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
784  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
785 
786  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
787  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
788 
789  size_t i( ibegin );
790 
791  for( ; i<ipos; i+=SIMDSIZE ) {
792  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
793  }
794  for( ; remainder && i<iend; ++i ) {
795  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
796  }
797  }
798 
799  for( ; element!=end; ++element )
800  {
801  const size_t j1( element->index() );
802  const ET2 v1( element->value() );
803 
804  const SIMDType xmm1( set( v1 ) );
805 
806  const size_t ibegin( ( IsLower_v<MT4> )
807  ?( ( IsStrictlyLower_v<MT4> )
808  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
809  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
810  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
811  const size_t iend( ( IsUpper_v<MT4> )
812  ?( ( IsStrictlyUpper_v<MT4> )
813  ?( SYM || HERM || UPP ? max(j+1UL,j1) : j1 )
814  :( SYM || HERM || UPP ? max(j,j1)+1UL : j1+1UL ) )
815  :( SYM || HERM || UPP ? j+1UL : A.rows() ) );
816  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
817 
818  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
819  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
820 
821  size_t i( ibegin );
822 
823  for( ; i<ipos; i+=SIMDSIZE ) {
824  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
825  }
826  for( ; remainder && i<iend; ++i ) {
827  C(i,j) += A(i,j1) * v1;
828  }
829  }
830  }
831 
832  if( SYM || HERM ) {
833  for( size_t j=0UL; j<B.columns(); ++j ) {
834  for( size_t i=j+1UL; i<A.rows(); ++i ) {
835  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
836  }
837  }
838  }
839  }
841  //**********************************************************************************************
842 
843  //**Assignment to sparse matrices***************************************************************
856  template< typename MT // Type of the target sparse matrix
857  , bool SO > // Storage order of the target sparse matrix
858  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
859  {
861 
862  using TmpType = If_t< SO, ResultType, OppositeType >;
863 
870 
871  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
872  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
873 
874  const ForwardFunctor fwd;
875 
876  const TmpType tmp( serial( rhs ) );
877  assign( ~lhs, fwd( tmp ) );
878  }
880  //**********************************************************************************************
881 
882  //**Addition assignment to dense matrices*******************************************************
895  template< typename MT // Type of the target dense matrix
896  , bool SO > // Storage order of the target dense matrix
897  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
898  {
900 
901  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
902  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
903 
904  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
905  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side sparse matrix operand
906 
907  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
908  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
909  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
910  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
911  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
912  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
913 
914  TDMatTSMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
915  }
917  //**********************************************************************************************
918 
919  //**Default addition assignment to dense matrices***********************************************
933  template< typename MT3 // Type of the left-hand side target matrix
934  , typename MT4 // Type of the left-hand side matrix operand
935  , typename MT5 > // Type of the right-hand side matrix operand
936  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
937  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
938  {
939  const size_t block( IsColumnMajorMatrix_v<MT3> || IsDiagonal_v<MT4> ? A.rows() : 64UL );
940 
941  for( size_t ii=0UL; ii<A.rows(); ii+=block )
942  {
943  const size_t itmp( min( ii+block, A.rows() ) );
944 
945  for( size_t j=0UL; j<B.columns(); ++j )
946  {
947  auto element( B.begin(j) );
948  const auto end( B.end(j) );
949 
950  for( ; element!=end; ++element )
951  {
952  const size_t j1( element->index() );
953 
954  if( IsDiagonal_v<MT4> )
955  {
956  C(j1,j) += A(j1,j1) * element->value();
957  }
958  else
959  {
960  const size_t ibegin( ( IsLower_v<MT4> )
961  ?( ( LOW )
962  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
963  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
964  :( LOW ? max(j,ii) : ii ) );
965  const size_t iend( ( IsUpper_v<MT4> )
966  ?( ( UPP )
967  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
968  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
969  :( UPP ? min(j+1UL,itmp) : itmp ) );
970 
971  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
972  continue;
973 
974  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
975 
976  const size_t inum( iend - ibegin );
977  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
978  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
979 
980  for( size_t i=ibegin; i<ipos; i+=4UL ) {
981  C(i ,j) += A(i ,j1) * element->value();
982  C(i+1UL,j) += A(i+1UL,j1) * element->value();
983  C(i+2UL,j) += A(i+2UL,j1) * element->value();
984  C(i+3UL,j) += A(i+3UL,j1) * element->value();
985  }
986  for( size_t i=ipos; i<iend; ++i ) {
987  C(i,j) += A(i,j1) * element->value();
988  }
989  }
990  }
991  }
992  }
993  }
995  //**********************************************************************************************
996 
997  //**Optimized addition assignment to dense matrices*********************************************
1011  template< typename MT3 // Type of the left-hand side target matrix
1012  , typename MT4 // Type of the left-hand side matrix operand
1013  , typename MT5 > // Type of the right-hand side matrix operand
1014  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1015  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1016  {
1017  const size_t block( IsColumnMajorMatrix_v<MT3> ? A.rows() : 64UL );
1018 
1019  for( size_t ii=0UL; ii<A.rows(); ii+=block )
1020  {
1021  const size_t itmp( min( ii+block, A.rows() ) );
1022 
1023  for( size_t j=0UL; j<B.columns(); ++j )
1024  {
1025  const auto end( B.end(j) );
1026  auto element( B.begin(j) );
1027 
1028  const size_t nonzeros( B.nonZeros(j) );
1029  const size_t kpos( nonzeros & size_t(-4) );
1030  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1031 
1032  for( size_t k=0UL; k<kpos; k+=4UL )
1033  {
1034  const size_t j1( element->index() );
1035  const ET2 v1( element->value() );
1036  ++element;
1037  const size_t j2( element->index() );
1038  const ET2 v2( element->value() );
1039  ++element;
1040  const size_t j3( element->index() );
1041  const ET2 v3( element->value() );
1042  ++element;
1043  const size_t j4( element->index() );
1044  const ET2 v4( element->value() );
1045  ++element;
1046 
1047  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1048 
1049  const size_t ibegin( ( IsLower_v<MT4> )
1050  ?( ( LOW )
1051  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1052  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1053  :( LOW ? max(j,ii) : ii ) );
1054  const size_t iend( ( IsUpper_v<MT4> )
1055  ?( ( UPP )
1056  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) )
1057  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) ) )
1058  :( UPP ? min(j+1UL,itmp) : itmp ) );
1059 
1060  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1061  continue;
1062 
1063  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1064 
1065  const size_t inum( iend - ibegin );
1066  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1067  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1068 
1069  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1070  C(i ,j) += A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1071  C(i+1UL,j) += A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1072  C(i+2UL,j) += A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1073  C(i+3UL,j) += A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1074  }
1075  for( size_t i=ipos; i<iend; ++i ) {
1076  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1077  }
1078  }
1079 
1080  for( ; element!=end; ++element )
1081  {
1082  const size_t j1( element->index() );
1083  const ET2 v1( element->value() );
1084 
1085  const size_t ibegin( ( IsLower_v<MT4> )
1086  ?( ( LOW )
1087  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1088  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1089  :( LOW ? max(j,ii) : ii ) );
1090  const size_t iend( ( IsUpper_v<MT4> )
1091  ?( ( UPP )
1092  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
1093  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
1094  :( UPP ? min(j+1UL,itmp) : itmp ) );
1095 
1096  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1097  continue;
1098 
1099  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1100 
1101  const size_t inum( iend - ibegin );
1102  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1103  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1104 
1105  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1106  C(i ,j) += A(i ,j1) * v1;
1107  C(i+1UL,j) += A(i+1UL,j1) * v1;
1108  C(i+2UL,j) += A(i+2UL,j1) * v1;
1109  C(i+3UL,j) += A(i+3UL,j1) * v1;
1110  }
1111  for( size_t i=ipos; i<iend; ++i ) {
1112  C(i,j) += A(i,j1) * v1;
1113  }
1114  }
1115  }
1116  }
1117  }
1119  //**********************************************************************************************
1120 
1121  //**Vectorized addition assignment to column-major dense matrices*******************************
1135  template< typename MT3 // Type of the left-hand side target matrix
1136  , typename MT4 // Type of the left-hand side matrix operand
1137  , typename MT5 > // Type of the right-hand side matrix operand
1138  static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1139  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1140  {
1141  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT4> );
1142 
1143  for( size_t j=0UL; j<B.columns(); ++j )
1144  {
1145  const auto end( B.end(j) );
1146  auto element( B.begin(j) );
1147 
1148  const size_t nonzeros( B.nonZeros(j) );
1149  const size_t kpos( nonzeros & size_t(-4) );
1150  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1151 
1152  for( size_t k=0UL; k<kpos; k+=4UL )
1153  {
1154  const size_t j1( element->index() );
1155  const ET2 v1( element->value() );
1156  ++element;
1157  const size_t j2( element->index() );
1158  const ET2 v2( element->value() );
1159  ++element;
1160  const size_t j3( element->index() );
1161  const ET2 v3( element->value() );
1162  ++element;
1163  const size_t j4( element->index() );
1164  const ET2 v4( element->value() );
1165  ++element;
1166 
1167  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1168 
1169  const SIMDType xmm1( set( v1 ) );
1170  const SIMDType xmm2( set( v2 ) );
1171  const SIMDType xmm3( set( v3 ) );
1172  const SIMDType xmm4( set( v4 ) );
1173 
1174  const size_t ibegin( ( IsLower_v<MT4> )
1175  ?( ( IsStrictlyLower_v<MT4> )
1176  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1177  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1178  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1179  const size_t iend( ( IsUpper_v<MT4> )
1180  ?( ( IsStrictlyUpper_v<MT4> )
1181  ?( UPP ? max(j+1UL,j4) : j4 )
1182  :( UPP ? max(j,j4)+1UL : j4+1UL ) )
1183  :( UPP ? j+1UL : A.rows() ) );
1184  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1185 
1186  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1187  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1188 
1189  size_t i( ibegin );
1190 
1191  for( ; i<ipos; i+=SIMDSIZE ) {
1192  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
1193  }
1194  for( ; remainder && i<iend; ++i ) {
1195  C(i,j) += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1196  }
1197  }
1198 
1199  for( ; element!=end; ++element )
1200  {
1201  const size_t j1( element->index() );
1202  const ET2 v1( element->value() );
1203 
1204  const SIMDType xmm1( set( v1 ) );
1205 
1206  const size_t ibegin( ( IsLower_v<MT4> )
1207  ?( ( IsStrictlyLower_v<MT4> )
1208  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1209  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1210  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1211  const size_t iend( ( IsUpper_v<MT4> )
1212  ?( ( IsStrictlyUpper_v<MT4> )
1213  ?( UPP ? max(j+1UL,j1) : j1 )
1214  :( UPP ? max(j,j1)+1UL : j1+1UL ) )
1215  :( UPP ? j+1UL : A.rows() ) );
1216  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1217 
1218  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1219  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1220 
1221  size_t i( ibegin );
1222 
1223  for( ; i<ipos; i+=SIMDSIZE ) {
1224  C.store( i, j, C.load(i,j) + A.load(i,j1) * xmm1 );
1225  }
1226  for( ; remainder && i<iend; ++i ) {
1227  C(i,j) += A(i,j1) * v1;
1228  }
1229  }
1230  }
1231  }
1233  //**********************************************************************************************
1234 
1235  //**Addition assignment to sparse matrices******************************************************
1236  // No special implementation for the addition assignment to sparse matrices.
1237  //**********************************************************************************************
1238 
1239  //**Subtraction assignment to dense matrices****************************************************
1252  template< typename MT // Type of the target dense matrix
1253  , bool SO > // Storage order of the target dense matrix
1254  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1255  {
1257 
1258  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1259  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1260 
1261  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1262  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side sparse matrix operand
1263 
1264  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1265  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1266  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1267  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1268  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1269  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1270 
1271  TDMatTSMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1272  }
1274  //**********************************************************************************************
1275 
1276  //**Default subtraction assignment to dense matrices********************************************
1290  template< typename MT3 // Type of the left-hand side target matrix
1291  , typename MT4 // Type of the left-hand side matrix operand
1292  , typename MT5 > // Type of the right-hand side matrix operand
1293  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1294  -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
1295  {
1296  const size_t block( IsColumnMajorMatrix_v<MT3> || IsDiagonal_v<MT4> ? A.rows() : 64UL );
1297 
1298  for( size_t ii=0UL; ii<A.rows(); ii+=block )
1299  {
1300  const size_t itmp( min( ii+block, A.rows() ) );
1301 
1302  for( size_t j=0UL; j<B.columns(); ++j )
1303  {
1304  auto element( B.begin(j) );
1305  const auto end( B.end(j) );
1306 
1307  for( ; element!=end; ++element )
1308  {
1309  const size_t j1( element->index() );
1310 
1311  if( IsDiagonal_v<MT4> )
1312  {
1313  C(j1,j) -= A(j1,j1) * element->value();
1314  }
1315  else
1316  {
1317  const size_t ibegin( ( IsLower_v<MT4> )
1318  ?( ( LOW )
1319  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1320  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1321  :( LOW ? max(j,ii) : ii ) );
1322  const size_t iend( ( IsUpper_v<MT4> )
1323  ?( ( UPP )
1324  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
1325  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
1326  :( UPP ? min(j+1UL,itmp) : itmp ) );
1327 
1328  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1329  continue;
1330 
1331  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1332 
1333  const size_t inum( iend - ibegin );
1334  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1335  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1336 
1337  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1338  C(i ,j) -= A(i ,j1) * element->value();
1339  C(i+1UL,j) -= A(i+1UL,j1) * element->value();
1340  C(i+2UL,j) -= A(i+2UL,j1) * element->value();
1341  C(i+3UL,j) -= A(i+3UL,j1) * element->value();
1342  }
1343  for( size_t i=ipos; i<iend; ++i ) {
1344  C(i,j) -= A(i,j1) * element->value();
1345  }
1346  }
1347  }
1348  }
1349  }
1350  }
1352  //**********************************************************************************************
1353 
1354  //**Optimized subtraction assignment to dense matrices******************************************
1368  template< typename MT3 // Type of the left-hand side target matrix
1369  , typename MT4 // Type of the left-hand side matrix operand
1370  , typename MT5 > // Type of the right-hand side matrix operand
1371  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1372  -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1373  {
1374  const size_t block( IsColumnMajorMatrix_v<MT3> ? A.rows() : 64UL );
1375 
1376  for( size_t ii=0UL; ii<A.rows(); ii+=block )
1377  {
1378  const size_t itmp( min( ii+block, A.rows() ) );
1379 
1380  for( size_t j=0UL; j<B.columns(); ++j )
1381  {
1382  const auto end( B.end(j) );
1383  auto element( B.begin(j) );
1384 
1385  const size_t nonzeros( B.nonZeros(j) );
1386  const size_t kpos( nonzeros & size_t(-4) );
1387  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1388 
1389  for( size_t k=0UL; k<kpos; k+=4UL )
1390  {
1391  const size_t j1( element->index() );
1392  const ET2 v1( element->value() );
1393  ++element;
1394  const size_t j2( element->index() );
1395  const ET2 v2( element->value() );
1396  ++element;
1397  const size_t j3( element->index() );
1398  const ET2 v3( element->value() );
1399  ++element;
1400  const size_t j4( element->index() );
1401  const ET2 v4( element->value() );
1402  ++element;
1403 
1404  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1405 
1406  const size_t ibegin( ( IsLower_v<MT4> )
1407  ?( ( LOW )
1408  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1409  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1410  :( LOW ? max(j,ii) : ii ) );
1411  const size_t iend( ( IsUpper_v<MT4> )
1412  ?( ( UPP )
1413  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) )
1414  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j4 : j4+1UL ) ) ) )
1415  :( UPP ? min(j+1UL,itmp) : itmp ) );
1416 
1417  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1418  continue;
1419 
1420  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1421 
1422  const size_t inum( iend - ibegin );
1423  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1424  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1425 
1426  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1427  C(i ,j) -= A(i ,j1) * v1 + A(i ,j2) * v2 + A(i ,j3) * v3 + A(i ,j4) * v4;
1428  C(i+1UL,j) -= A(i+1UL,j1) * v1 + A(i+1UL,j2) * v2 + A(i+1UL,j3) * v3 + A(i+1UL,j4) * v4;
1429  C(i+2UL,j) -= A(i+2UL,j1) * v1 + A(i+2UL,j2) * v2 + A(i+2UL,j3) * v3 + A(i+2UL,j4) * v4;
1430  C(i+3UL,j) -= A(i+3UL,j1) * v1 + A(i+3UL,j2) * v2 + A(i+3UL,j3) * v3 + A(i+3UL,j4) * v4;
1431  }
1432  for( size_t i=ipos; i<iend; ++i ) {
1433  C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1434  }
1435  }
1436 
1437  for( ; element!=end; ++element )
1438  {
1439  const size_t j1( element->index() );
1440  const ET2 v1( element->value() );
1441 
1442  const size_t ibegin( ( IsLower_v<MT4> )
1443  ?( ( LOW )
1444  ?( max( j, ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) )
1445  :( max( ii, ( IsStrictlyLower_v<MT4> ? j1+1UL : j1 ) ) ) )
1446  :( LOW ? max(j,ii) : ii ) );
1447  const size_t iend( ( IsUpper_v<MT4> )
1448  ?( ( UPP )
1449  ?( min( j+1UL, itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) )
1450  :( min( itmp, ( IsStrictlyUpper_v<MT4> ? j1 : j1+1UL ) ) ) )
1451  :( UPP ? min(j+1UL,itmp) : itmp ) );
1452 
1453  if( ( LOW || UPP || IsTriangular_v<MT4> ) && ( ibegin >= iend ) )
1454  continue;
1455 
1456  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1457 
1458  const size_t inum( iend - ibegin );
1459  const size_t ipos( ibegin + ( inum & size_t(-4) ) );
1460  BLAZE_INTERNAL_ASSERT( ( ibegin + inum - ( inum % 4UL ) ) == ipos, "Invalid end calculation" );
1461 
1462  for( size_t i=ibegin; i<ipos; i+=4UL ) {
1463  C(i ,j) -= A(i ,j1) * v1;
1464  C(i+1UL,j) -= A(i+1UL,j1) * v1;
1465  C(i+2UL,j) -= A(i+2UL,j1) * v1;
1466  C(i+3UL,j) -= A(i+3UL,j1) * v1;
1467  }
1468  for( size_t i=ipos; i<iend; ++i ) {
1469  C(i,j) -= A(i,j1) * v1;
1470  }
1471  }
1472  }
1473  }
1474  }
1476  //**********************************************************************************************
1477 
1478  //**Vectorized subtraction assignment to column-major dense matrices****************************
1492  template< typename MT3 // Type of the left-hand side target matrix
1493  , typename MT4 // Type of the left-hand side matrix operand
1494  , typename MT5 > // Type of the right-hand side matrix operand
1495  static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1496  -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1497  {
1498  constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT4> );
1499 
1500  for( size_t j=0UL; j<B.columns(); ++j )
1501  {
1502  const auto end( B.end(j) );
1503  auto element( B.begin(j) );
1504 
1505  const size_t nonzeros( B.nonZeros(j) );
1506  const size_t kpos( nonzeros & size_t(-4) );
1507  BLAZE_INTERNAL_ASSERT( ( nonzeros - ( nonzeros % 4UL ) ) == kpos, "Invalid end calculation" );
1508 
1509  for( size_t k=0UL; k<kpos; k+=4UL )
1510  {
1511  const size_t j1( element->index() );
1512  const ET2 v1( element->value() );
1513  ++element;
1514  const size_t j2( element->index() );
1515  const ET2 v2( element->value() );
1516  ++element;
1517  const size_t j3( element->index() );
1518  const ET2 v3( element->value() );
1519  ++element;
1520  const size_t j4( element->index() );
1521  const ET2 v4( element->value() );
1522  ++element;
1523 
1524  BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse matrix index detected" );
1525 
1526  const SIMDType xmm1( set( v1 ) );
1527  const SIMDType xmm2( set( v2 ) );
1528  const SIMDType xmm3( set( v3 ) );
1529  const SIMDType xmm4( set( v4 ) );
1530 
1531  const size_t ibegin( ( IsLower_v<MT4> )
1532  ?( ( IsStrictlyLower_v<MT4> )
1533  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1534  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1535  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1536  const size_t iend( ( IsUpper_v<MT4> )
1537  ?( ( IsStrictlyUpper_v<MT4> )
1538  ?( UPP ? max(j+1UL,j4) : j4 )
1539  :( UPP ? max(j,j4)+1UL : j4+1UL ) )
1540  :( UPP ? j+1UL : A.rows() ) );
1541  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1542 
1543  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1544  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1545 
1546  size_t i( ibegin );
1547 
1548  for( ; i<ipos; i+=SIMDSIZE ) {
1549  C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 - A.load(i,j2) * xmm2 - A.load(i,j3) * xmm3 - A.load(i,j4) * xmm4 );
1550  }
1551  for( ; remainder && i<iend; ++i ) {
1552  C(i,j) -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1553  }
1554  }
1555 
1556  for( ; element!=end; ++element )
1557  {
1558  const size_t j1( element->index() );
1559  const ET2 v1( element->value() );
1560 
1561  const SIMDType xmm1( set( v1 ) );
1562 
1563  const size_t ibegin( ( IsLower_v<MT4> )
1564  ?( ( IsStrictlyLower_v<MT4> )
1565  ?( ( LOW ? max(j,j1+1UL) : j1+1UL ) & size_t(-SIMDSIZE) )
1566  :( ( LOW ? max(j,j1) : j1 ) & size_t(-SIMDSIZE) ) )
1567  :( LOW ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1568  const size_t iend( ( IsUpper_v<MT4> )
1569  ?( ( IsStrictlyUpper_v<MT4> )
1570  ?( UPP ? max(j+1UL,j1) : j1 )
1571  :( UPP ? max(j,j1)+1UL : j1+1UL ) )
1572  :( UPP ? j+1UL : A.rows() ) );
1573  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1574 
1575  const size_t ipos( remainder ? ( iend & size_t(-SIMDSIZE) ) : iend );
1576  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % (SIMDSIZE) ) ) == ipos, "Invalid end calculation" );
1577 
1578  size_t i( ibegin );
1579 
1580  for( ; i<ipos; i+=SIMDSIZE ) {
1581  C.store( i, j, C.load(i,j) - A.load(i,j1) * xmm1 );
1582  }
1583  for( ; remainder && i<iend; ++i ) {
1584  C(i,j) -= A(i,j1) * v1;
1585  }
1586  }
1587  }
1588  }
1590  //**********************************************************************************************
1591 
1592  //**Subtraction assignment to sparse matrices***************************************************
1593  // No special implementation for the subtraction assignment to sparse matrices.
1594  //**********************************************************************************************
1595 
1596  //**Schur product assignment to dense matrices**************************************************
1609  template< typename MT // Type of the target dense matrix
1610  , bool SO > // Storage order of the target dense matrix
1611  friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1612  {
1614 
1618 
1619  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1620  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1621 
1622  const ResultType tmp( serial( rhs ) );
1623  schurAssign( ~lhs, tmp );
1624  }
1626  //**********************************************************************************************
1627 
1628  //**Schur product assignment to sparse matrices*************************************************
1629  // No special implementation for the Schur product assignment to sparse matrices.
1630  //**********************************************************************************************
1631 
1632  //**Multiplication assignment to dense matrices*************************************************
1633  // No special implementation for the multiplication assignment to dense matrices.
1634  //**********************************************************************************************
1635 
1636  //**Multiplication assignment to sparse matrices************************************************
1637  // No special implementation for the multiplication assignment to sparse matrices.
1638  //**********************************************************************************************
1639 
1640  //**SMP assignment to dense matrices************************************************************
1655  template< typename MT // Type of the target dense matrix
1656  , bool SO > // Storage order of the target dense matrix
1657  friend inline auto smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1658  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1659  {
1661 
1662  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1663  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1664 
1665  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1666  RT B( rhs.rhs_ ); // Evaluation of the right-hand side sparse matrix operand
1667 
1668  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1669  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1670  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1671  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1672  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1673  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1674 
1675  smpAssign( ~lhs, A * B );
1676  }
1678  //**********************************************************************************************
1679 
1680  //**SMP assignment to sparse matrices***********************************************************
1695  template< typename MT // Type of the target sparse matrix
1696  , bool SO > // Storage order of the target sparse matrix
1697  friend inline auto smpAssign( SparseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1698  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1699  {
1701 
1702  using TmpType = If_t< SO, ResultType, OppositeType >;
1703 
1710 
1711  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1712  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1713 
1714  const ForwardFunctor fwd;
1715 
1716  const TmpType tmp( rhs );
1717  smpAssign( ~lhs, fwd( tmp ) );
1718  }
1720  //**********************************************************************************************
1721 
1722  //**SMP addition assignment to dense matrices***************************************************
1737  template< typename MT // Type of the target dense matrix
1738  , bool SO > // Storage order of the target dense matrix
1739  friend inline auto smpAddAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1740  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1741  {
1743 
1744  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1745  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1746 
1747  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1748  RT B( rhs.rhs_ ); // Evaluation of the right-hand side sparse matrix operand
1749 
1750  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1751  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1752  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1753  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1754  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1755  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1756 
1757  smpAddAssign( ~lhs, A * B );
1758  }
1760  //**********************************************************************************************
1761 
1762  //**SMP addition assignment to sparse matrices**************************************************
1763  // No special implementation for the SMP addition assignment to sparse matrices.
1764  //**********************************************************************************************
1765 
1766  //**SMP subtraction assignment to dense matrices************************************************
1781  template< typename MT // Type of the target dense matrix
1782  , bool SO > // Storage order of the target dense matrix
1783  friend inline auto smpSubAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1784  -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1785  {
1787 
1788  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1789  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1790 
1791  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1792  RT B( rhs.rhs_ ); // Evaluation of the right-hand side sparse matrix operand
1793 
1794  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1795  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1796  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1797  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1798  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1799  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1800 
1801  smpSubAssign( ~lhs, A * B );
1802  }
1804  //**********************************************************************************************
1805 
1806  //**SMP subtraction assignment to sparse matrices***********************************************
1807  // No special implementation for the SMP subtraction assignment to sparse matrices.
1808  //**********************************************************************************************
1809 
1810  //**SMP Schur product assignment to dense matrices**********************************************
1823  template< typename MT // Type of the target dense matrix
1824  , bool SO > // Storage order of the target dense matrix
1825  friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const TDMatTSMatMultExpr& rhs )
1826  {
1828 
1832 
1833  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1834  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1835 
1836  const ResultType tmp( rhs );
1837  smpSchurAssign( ~lhs, tmp );
1838  }
1840  //**********************************************************************************************
1841 
1842  //**SMP Schur product assignment to sparse matrices*********************************************
1843  // No special implementation for the SMP Schur product assignment to sparse matrices.
1844  //**********************************************************************************************
1845 
1846  //**SMP multiplication assignment to dense matrices*********************************************
1847  // No special implementation for the SMP multiplication assignment to dense matrices.
1848  //**********************************************************************************************
1849 
1850  //**SMP multiplication assignment to sparse matrices********************************************
1851  // No special implementation for the SMP multiplication assignment to sparse matrices.
1852  //**********************************************************************************************
1853 
1854  //**Compile time checks*************************************************************************
1863  //**********************************************************************************************
1864 };
1865 //*************************************************************************************************
1866 
1867 
1868 
1869 
1870 //=================================================================================================
1871 //
1872 // GLOBAL BINARY ARITHMETIC OPERATORS
1873 //
1874 //=================================================================================================
1875 
1876 //*************************************************************************************************
1889 template< typename MT1 // Type of the left-hand side dense matrix
1890  , typename MT2 // Type of the right-hand side sparse matrix
1891  , DisableIf_t< ( IsIdentity_v<MT2> &&
1892  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > ) ||
1893  IsZero_v<MT2> >* = nullptr >
1894 inline const TDMatTSMatMultExpr<MT1,MT2,false,false,false,false>
1895  tdmattsmatmult( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
1896 {
1898 
1899  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1900 
1901  return TDMatTSMatMultExpr<MT1,MT2,false,false,false,false>( ~lhs, ~rhs );
1902 }
1904 //*************************************************************************************************
1905 
1906 
1907 //*************************************************************************************************
1921 template< typename MT1 // Type of the left-hand side dense matrix
1922  , typename MT2 // Type of the right-hand side sparse matrix
1923  , EnableIf_t< IsIdentity_v<MT2> &&
1924  IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > >* = nullptr >
1925 inline const MT1&
1926  tdmattsmatmult( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
1927 {
1929 
1930  MAYBE_UNUSED( rhs );
1931 
1932  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1933 
1934  return (~lhs);
1935 }
1937 //*************************************************************************************************
1938 
1939 
1940 //*************************************************************************************************
1953 template< typename MT1 // Type of the left-hand side dense matrix
1954  , typename MT2 // Type of the right-hand side sparse matrix
1955  , EnableIf_t< IsZero_v<MT2> >* = nullptr >
1956 inline decltype(auto)
1957  tdmattsmatmult( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
1958 {
1960 
1961  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == (~rhs).rows(), "Invalid matrix sizes" );
1962 
1963  using ReturnType = const MultTrait_t< ResultType_t<MT1>, ResultType_t<MT2> >;
1964 
1966  BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE( ReturnType );
1967 
1968  return ReturnType( (~lhs).rows(), (~rhs).columns() );
1969 }
1971 //*************************************************************************************************
1972 
1973 
1974 //*************************************************************************************************
2003 template< typename MT1 // Type of the left-hand side dense matrix
2004  , typename MT2 > // Type of the right-hand side sparse matrix
2005 inline decltype(auto)
2006  operator*( const DenseMatrix<MT1,true>& lhs, const SparseMatrix<MT2,true>& rhs )
2007 {
2009 
2010  if( (~lhs).columns() != (~rhs).rows() ) {
2011  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
2012  }
2013 
2014  return tdmattsmatmult( ~lhs, ~rhs );
2015 }
2016 //*************************************************************************************************
2017 
2018 
2019 
2020 
2021 //=================================================================================================
2022 //
2023 // GLOBAL FUNCTIONS
2024 //
2025 //=================================================================================================
2026 
2027 //*************************************************************************************************
2051 template< typename MT1 // Type of the left-hand side dense matrix
2052  , typename MT2 // Type of the right-hand side dense matrix
2053  , bool SF // Symmetry flag
2054  , bool HF // Hermitian flag
2055  , bool LF // Lower flag
2056  , bool UF > // Upper flag
2057 inline decltype(auto) declsym( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2058 {
2060 
2061  if( !isSquare( dm ) ) {
2062  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
2063  }
2064 
2065  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,true,HF,LF,UF>;
2066  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2067 }
2069 //*************************************************************************************************
2070 
2071 
2072 //*************************************************************************************************
2096 template< typename MT1 // Type of the left-hand side dense matrix
2097  , typename MT2 // Type of the right-hand side dense matrix
2098  , bool SF // Symmetry flag
2099  , bool HF // Hermitian flag
2100  , bool LF // Lower flag
2101  , bool UF > // Upper flag
2102 inline decltype(auto) declherm( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2103 {
2105 
2106  if( !isSquare( dm ) ) {
2107  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
2108  }
2109 
2110  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,true,LF,UF>;
2111  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2112 }
2114 //*************************************************************************************************
2115 
2116 
2117 //*************************************************************************************************
2141 template< typename MT1 // Type of the left-hand side dense matrix
2142  , typename MT2 // Type of the right-hand side dense matrix
2143  , bool SF // Symmetry flag
2144  , bool HF // Hermitian flag
2145  , bool LF // Lower flag
2146  , bool UF > // Upper flag
2147 inline decltype(auto) decllow( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2148 {
2150 
2151  if( !isSquare( dm ) ) {
2152  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
2153  }
2154 
2155  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,HF,true,UF>;
2156  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2157 }
2159 //*************************************************************************************************
2160 
2161 
2162 //*************************************************************************************************
2186 template< typename MT1 // Type of the left-hand side dense matrix
2187  , typename MT2 // Type of the right-hand side dense matrix
2188  , bool SF // Symmetry flag
2189  , bool HF // Hermitian flag
2190  , bool LF // Lower flag
2191  , bool UF > // Upper flag
2192 inline decltype(auto) declupp( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2193 {
2195 
2196  if( !isSquare( dm ) ) {
2197  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
2198  }
2199 
2200  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,true>;
2201  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2202 }
2204 //*************************************************************************************************
2205 
2206 
2207 //*************************************************************************************************
2231 template< typename MT1 // Type of the left-hand side dense matrix
2232  , typename MT2 // Type of the right-hand side dense matrix
2233  , bool SF // Symmetry flag
2234  , bool HF // Hermitian flag
2235  , bool LF // Lower flag
2236  , bool UF > // Upper flag
2237 inline decltype(auto) decldiag( const TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2238 {
2240 
2241  if( !isSquare( dm ) ) {
2242  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
2243  }
2244 
2245  using ReturnType = const TDMatTSMatMultExpr<MT1,MT2,SF,HF,true,true>;
2246  return ReturnType( dm.leftOperand(), dm.rightOperand() );
2247 }
2249 //*************************************************************************************************
2250 
2251 
2252 
2253 
2254 //=================================================================================================
2255 //
2256 // SIZE SPECIALIZATIONS
2257 //
2258 //=================================================================================================
2259 
2260 //*************************************************************************************************
2262 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2263 struct Size< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2264  : public Size<MT1,0UL>
2265 {};
2266 
2267 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2268 struct Size< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2269  : public Size<MT2,1UL>
2270 {};
2272 //*************************************************************************************************
2273 
2274 
2275 
2276 
2277 //=================================================================================================
2278 //
2279 // ISALIGNED SPECIALIZATIONS
2280 //
2281 //=================================================================================================
2282 
2283 //*************************************************************************************************
2285 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2286 struct IsAligned< TDMatTSMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2287  : public IsAligned<MT1>
2288 {};
2290 //*************************************************************************************************
2291 
2292 } // namespace blaze
2293 
2294 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
static constexpr bool evaluateRight
Compilation switch for the composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:151
Header file for auxiliary alias declarations.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:133
Headerfile for the generic min algorithm.
Header file for the blaze::checked and blaze::unchecked instances.
Header file for the decldiag trait.
RightOperand rhs_
Right-hand side sparse matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:464
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:412
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:975
CompositeType_t< MT2 > CT2
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:141
Header file for basic type definitions.
ResultType_t< MT2 > RT2
Result type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:137
typename If< Condition, T1, T2 >::Type If_t
Auxiliary alias template for the If class template.The If_t alias template provides a convenient shor...
Definition: If.h:109
ElementType_t< RT2 > ET2
Element type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:139
Header file for the declherm trait.
ElementType_t< ResultType > ElementType
Resulting element type.
Definition: TDMatTSMatMultExpr.h:260
Expression object for transpose dense matrix-transpose sparse matrix multiplications....
Definition: Forward.h:174
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTSMatMultExpr.h:402
typename T::ResultType ResultType_t
Alias declaration for nested ResultType type definitions.The ResultType_t alias declaration provides ...
Definition: Aliases.h:390
Header file for the serial shim.
Header file for the IsDiagonal type trait.
Base template for the DeclUppTrait class.
Definition: DeclUppTrait.h:134
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type,...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:372
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTSMatMultExpr.h:436
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:595
static constexpr bool SYM
Flag for symmetric matrices.
Definition: TDMatTSMatMultExpr.h:155
constexpr Unchecked unchecked
Global Unchecked instance.The blaze::unchecked instance is an optional token for the creation of view...
Definition: Check.h:138
Constraint on the data type.
typename SIMDTrait< T >::Type SIMDTrait_t
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_t alias declaration provid...
Definition: SIMDTrait.h:315
ElementType_t< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:138
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:154
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatTSMatMultExpr.h:424
Header file for the MAYBE_UNUSED function template.
Header file for the IsIdentity type trait.
typename If_t< HERM, DeclHermTrait< MultTrait_t< RT1, RT2 > >, If_t< SYM, DeclSymTrait< MultTrait_t< RT1, RT2 > >, If_t< LOW, If_t< UPP, DeclDiagTrait< MultTrait_t< RT1, RT2 > >, DeclLowTrait< MultTrait_t< RT1, RT2 > > >, If_t< UPP, DeclUppTrait< MultTrait_t< RT1, RT2 > >, MultTrait< RT1, RT2 > > > > >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:256
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1001
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Header file for the reset shim.
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
static constexpr bool LOW
Flag for lower matrices.
Definition: TDMatTSMatMultExpr.h:157
constexpr size_t columns(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of columns of the matrix.
Definition: Matrix.h:514
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes....
Definition: DenseMatrix.h:81
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes....
Definition: Forward.h:145
typename T::ElementType ElementType_t
Alias declaration for nested ElementType type definitions.The ElementType_t alias declaration provide...
Definition: Aliases.h:170
Constraint on the data type.
Constraint on the data type.
typename EnableIf< Condition, T >::Type EnableIf_t
Auxiliary type for the EnableIf class template.The EnableIf_t alias declaration provides a convenient...
Definition: EnableIf.h:138
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: TDMatTSMatMultExpr.h:392
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: TDMatTSMatMultExpr.h:382
Namespace of the Blaze C++ math library.
Definition: Blaze.h:58
Header file for the DeclLow functor.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is not a zero vector or matrix type,...
Definition: Zero.h:61
TDMatTSMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the TDMatTSMatMultExpr class.
Definition: TDMatTSMatMultExpr.h:302
Generic wrapper for the decllow() function.
Definition: DeclLow.h:59
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1162
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:262
Header file for the decllow trait.
static constexpr bool evaluateLeft
Compilation switch for the composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:146
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
ResultType_t< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:136
SIMDTrait_t< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatTSMatMultExpr.h:261
Header file for all SIMD functionality.
static constexpr bool UPP
Flag for upper matrices.
Definition: TDMatTSMatMultExpr.h:158
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1001
Header file for the IsLower type trait.
constexpr void MAYBE_UNUSED(const Args &...)
Suppression of unused parameter warnings.
Definition: MaybeUnused.h:81
Header file for the IsAligned type trait.
Generic wrapper for the null function.
Definition: Noop.h:60
Header file for the IsTriangular type trait.
Base template for the DeclSymTrait class.
Definition: DeclSymTrait.h:134
Constraints on the storage order of matrix types.
static constexpr bool simdEnabled
Compilation switch for the expression template evaluation strategy.
Definition: TDMatTSMatMultExpr.h:280
Header file for the exception macros of the math module.
decltype(auto) max(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise maximum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1198
MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:438
Header file for the DeclDiag functor.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:317
static constexpr bool smpAssignable
Compilation switch for the expression template assignment strategy.
Definition: TDMatTSMatMultExpr.h:287
Constraint on the data type.
CompositeType_t< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:140
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the IsPadded type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:103
typename T::OppositeType OppositeType_t
Alias declaration for nested OppositeType type definitions.The OppositeType_t alias declaration provi...
Definition: Aliases.h:270
Header file for the conjugate shim.
Header file for the declupp trait.
Header file for the IsSIMDCombinable type trait.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
If_t< IsExpression_v< MT2 >, const MT2, const MT2 & > RightOperand
Composite type of the right-hand side sparse matrix expression.
Definition: TDMatTSMatMultExpr.h:269
typename T::TransposeType TransposeType_t
Alias declaration for nested TransposeType type definitions.The TransposeType_t alias declaration pro...
Definition: Aliases.h:470
Header file for run time assertion macros.
Base template for the DeclHermTrait class.
Definition: DeclHermTrait.h:134
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.The CompositeType_t alias declaration pro...
Definition: Aliases.h:90
Base template for the MultTrait class.
Definition: MultTrait.h:146
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:133
TransposeType_t< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:259
Header file for the IsZero type trait.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:295
Header file for the declsym trait.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
Header file for all forward declarations for expression class templates.
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1002
Header file for the isDefault shim.
BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v< T > &&HasSize_v< T, 1UL >, If_t< IsSigned_v< T >, SIMDint8, SIMDuint8 > > set(T value) noexcept
Sets all values in the vector to the given 1-byte integral value.
Definition: Set.h:75
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:59
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:808
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
constexpr size_t rows(const Matrix< MT, SO > &matrix) noexcept
Returns the current number of rows of the matrix.
Definition: Matrix.h:498
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:59
static constexpr size_t SIMDSIZE
The number of elements packed within a single SIMD element.
Definition: TDMatTSMatMultExpr.h:293
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: TDMatTSMatMultExpr.h:366
Base template for the DeclLowTrait class.
Definition: DeclLowTrait.h:134
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1002
If_t< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTSMatMultExpr.h:272
If_t< IsExpression_v< MT1 >, const MT1, const MT1 & > LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTSMatMultExpr.h:266
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
Header file for the IntegralConstant class template.
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:59
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTSMatMultExpr.h:463
If_t< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side sparse matrix operand.
Definition: TDMatTSMatMultExpr.h:275
Header file for the DeclHerm functor.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTSMatMultExpr.h:456
bool isDefault(const DiagonalProxy< MT > &proxy)
Returns whether the represented element is in default state.
Definition: DiagonalProxy.h:635
Header file for the IsUpper type trait.
typename DisableIf< Condition, T >::Type DisableIf_t
Auxiliary type for the DisableIf class template.The DisableIf_t alias declaration provides a convenie...
Definition: DisableIf.h:138
static constexpr bool HERM
Flag for Hermitian matrices.
Definition: TDMatTSMatMultExpr.h:156
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1324
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:59
Base template for the DeclDiagTrait class.
Definition: DeclDiagTrait.h:134
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTSMatMultExpr.h:263
bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:951
Header file for the IsResizable type trait.
OppositeType_t< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTSMatMultExpr.h:258
Header file for the Size type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE(T)
Constraint on the data type.In case the given data type T is a zero vector or matrix type,...
Definition: Zero.h:81
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression,...
Definition: Assert.h:101
Header file for the DeclSym functor.
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a sparse, N-dimensional matrix type,...
Definition: SparseMatrix.h:61
Header file for the IsExpression type trait class.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTSMatMultExpr.h:446
Header file for the function trace functionality.