Blaze 3.9
SMatDMatMultExpr.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
36#define _BLAZE_MATH_EXPRESSIONS_SMATDMATMULTEXPR_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
43#include <blaze/math/Aliases.h>
67#include <blaze/math/SIMD.h>
98#include <blaze/util/Assert.h>
99#include <blaze/util/EnableIf.h>
103#include <blaze/util/mpl/If.h>
104#include <blaze/util/Types.h>
106
107
108namespace blaze {
109
110//=================================================================================================
111//
112// CLASS SMATDMATMULTEXPR
113//
114//=================================================================================================
115
116//*************************************************************************************************
123template< typename MT1 // Type of the left-hand side sparse matrix
124 , typename MT2 // Type of the right-hand side dense matrix
125 , bool SF // Symmetry flag
126 , bool HF // Hermitian flag
127 , bool LF // Lower flag
128 , bool UF > // Upper flag
130 : public MatMatMultExpr< DenseMatrix< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false > >
131 , private Computation
132{
133 private:
134 //**Type definitions****************************************************************************
141 //**********************************************************************************************
142
143 //**********************************************************************************************
145 static constexpr bool evaluateLeft = ( IsComputation_v<MT1> || RequiresEvaluation_v<MT1> );
146 //**********************************************************************************************
147
148 //**********************************************************************************************
150 static constexpr bool evaluateRight = ( IsComputation_v<MT2> || RequiresEvaluation_v<MT2> );
151 //**********************************************************************************************
152
153 //**********************************************************************************************
154 static constexpr bool SYM = ( SF && !( HF || LF || UF ) );
155 static constexpr bool HERM = ( HF && !( LF || UF ) );
156 static constexpr bool LOW = ( LF || ( ( SF || HF ) && UF ) );
157 static constexpr bool UPP = ( UF || ( ( SF || HF ) && LF ) );
158 //**********************************************************************************************
159
160 //**********************************************************************************************
162
166 template< typename T1, typename T2, typename T3 >
167 static constexpr bool IsEvaluationRequired_v = ( evaluateLeft || evaluateRight );
169 //**********************************************************************************************
170
171 //**********************************************************************************************
173
176 template< typename T1, typename T2, typename T3 >
177 static constexpr bool UseVectorizedKernel_v =
178 ( useOptimizedKernels &&
179 !IsDiagonal_v<T3> &&
180 T1::simdEnabled && T3::simdEnabled &&
181 IsRowMajorMatrix_v<T1> &&
182 IsSIMDCombinable_v< ElementType_t<T1>
184 , ElementType_t<T3> > &&
185 HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
186 HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
188 //**********************************************************************************************
189
190 //**********************************************************************************************
192
196 template< typename T1, typename T2, typename T3 >
197 static constexpr bool UseOptimizedKernel_v =
198 ( useOptimizedKernels &&
199 !UseVectorizedKernel_v<T1,T2,T3> &&
200 !IsDiagonal_v<T3> &&
201 !IsResizable_v< ElementType_t<T1> > &&
202 !IsResizable_v<ET1> );
204 //**********************************************************************************************
205
206 //**********************************************************************************************
208
211 template< typename T1, typename T2, typename T3 >
212 static constexpr bool UseDefaultKernel_v =
213 ( !UseVectorizedKernel_v<T1,T2,T3> &&
214 !UseOptimizedKernel_v<T1,T2,T3> );
216 //**********************************************************************************************
217
218 //**********************************************************************************************
220
223 using ForwardFunctor = If_t< HERM
224 , DeclHerm
225 , If_t< SYM
226 , DeclSym
227 , If_t< LOW
228 , If_t< UPP
229 , DeclDiag
230 , DeclLow >
231 , If_t< UPP
232 , DeclUpp
233 , Noop > > > >;
235 //**********************************************************************************************
236
237 public:
238 //**Type definitions****************************************************************************
241
244
246 using ResultType = typename If_t< HERM
248 , If_t< SYM
250 , If_t< LOW
251 , If_t< UPP
254 , If_t< UPP
256 , MultTrait<RT1,RT2> > > > >::Type;
257
262 using ReturnType = const ElementType;
263 using CompositeType = const ResultType;
264
266 using LeftOperand = If_t< IsExpression_v<MT1>, const MT1, const MT1& >;
267
269 using RightOperand = If_t< IsExpression_v<MT2>, const MT2, const MT2& >;
270
273
276 //**********************************************************************************************
277
278 //**Compilation flags***************************************************************************
280 static constexpr bool simdEnabled =
281 ( !IsDiagonal_v<MT2> &&
282 MT2::simdEnabled &&
283 HasSIMDAdd_v<ET1,ET2> &&
284 HasSIMDMult_v<ET1,ET2> );
285
287 static constexpr bool smpAssignable =
288 ( !evaluateLeft && MT1::smpAssignable && !evaluateRight && MT2::smpAssignable );
289 //**********************************************************************************************
290
291 //**SIMD properties*****************************************************************************
293 static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
294 //**********************************************************************************************
295
296 //**Constructor*********************************************************************************
302 inline SMatDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
303 : lhs_( lhs ) // Left-hand side sparse matrix of the multiplication expression
304 , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
305 {
306 BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
307 }
308 //**********************************************************************************************
309
310 //**Access operator*****************************************************************************
317 inline ReturnType operator()( size_t i, size_t j ) const {
318 BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
319 BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
320
321 if( IsDiagonal_v<MT1> ) {
322 return lhs_(i,i) * rhs_(i,j);
323 }
324 else if( IsDiagonal_v<MT2> ) {
325 return lhs_(i,j) * rhs_(j,j);
326 }
327 else if( IsTriangular_v<MT1> || IsTriangular_v<MT2> ) {
328 const size_t begin( ( IsUpper_v<MT1> )
329 ?( ( IsLower_v<MT2> )
330 ?( max( ( IsStrictlyUpper_v<MT1> ? i+1UL : i )
331 , ( IsStrictlyLower_v<MT2> ? j+1UL : j ) ) )
332 :( IsStrictlyUpper_v<MT1> ? i+1UL : i ) )
333 :( ( IsLower_v<MT2> )
334 ?( IsStrictlyLower_v<MT2> ? j+1UL : j )
335 :( 0UL ) ) );
336 const size_t end( ( IsLower_v<MT1> )
337 ?( ( IsUpper_v<MT2> )
338 ?( min( ( IsStrictlyLower_v<MT1> ? i : i+1UL )
339 , ( IsStrictlyUpper_v<MT2> ? j : j+1UL ) ) )
340 :( IsStrictlyLower_v<MT1> ? i : i+1UL ) )
341 :( ( IsUpper_v<MT2> )
342 ?( IsStrictlyUpper_v<MT2> ? j : j+1UL )
343 :( lhs_.columns() ) ) );
344
345 if( begin >= end ) return ElementType();
346
347 const size_t n( end - begin );
348
349 return subvector( row( lhs_, i, unchecked ), begin, n, unchecked ) *
351 }
352 else {
353 return row( lhs_, i, unchecked ) * column( rhs_, j, unchecked );
354 }
355 }
356 //**********************************************************************************************
357
358 //**At function*********************************************************************************
366 inline ReturnType at( size_t i, size_t j ) const {
367 if( i >= lhs_.rows() ) {
368 BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
369 }
370 if( j >= rhs_.columns() ) {
371 BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
372 }
373 return (*this)(i,j);
374 }
375 //**********************************************************************************************
376
377 //**Rows function*******************************************************************************
382 inline size_t rows() const noexcept {
383 return lhs_.rows();
384 }
385 //**********************************************************************************************
386
387 //**Columns function****************************************************************************
392 inline size_t columns() const noexcept {
393 return rhs_.columns();
394 }
395 //**********************************************************************************************
396
397 //**Left operand access*************************************************************************
402 inline LeftOperand leftOperand() const noexcept {
403 return lhs_;
404 }
405 //**********************************************************************************************
406
407 //**Right operand access************************************************************************
412 inline RightOperand rightOperand() const noexcept {
413 return rhs_;
414 }
415 //**********************************************************************************************
416
417 //**********************************************************************************************
423 template< typename T >
424 inline bool canAlias( const T* alias ) const noexcept {
425 return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
426 }
427 //**********************************************************************************************
428
429 //**********************************************************************************************
435 template< typename T >
436 inline bool isAliased( const T* alias ) const noexcept {
437 return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
438 }
439 //**********************************************************************************************
440
441 //**********************************************************************************************
446 inline bool isAligned() const noexcept {
447 return rhs_.isAligned();
448 }
449 //**********************************************************************************************
450
451 //**********************************************************************************************
456 inline bool canSMPAssign() const noexcept {
457 return ( rows() * columns() >= SMP_SMATDMATMULT_THRESHOLD ) && !IsDiagonal_v<MT2>;
458 }
459 //**********************************************************************************************
460
461 private:
462 //**Member variables****************************************************************************
465 //**********************************************************************************************
466
467 //**Assignment to dense matrices****************************************************************
480 template< typename MT // Type of the target dense matrix
481 , bool SO > // Storage order of the target dense matrix
482 friend inline void assign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
483 {
485
486 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
487 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
488
489 LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
490 RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
491
492 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
493 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
494 BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
495 BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
496 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).rows() , "Invalid number of rows" );
497 BLAZE_INTERNAL_ASSERT( B.columns() == (*lhs).columns() , "Invalid number of columns" );
498
499 SMatDMatMultExpr::selectAssignKernel( *lhs, A, B );
500 }
502 //**********************************************************************************************
503
504 //**Default assignment to dense matrices********************************************************
518 template< typename MT3 // Type of the left-hand side target matrix
519 , typename MT4 // Type of the left-hand side matrix operand
520 , typename MT5 > // Type of the right-hand side matrix operand
521 static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
523 {
524 const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
525
526 reset( C );
527
528 for( size_t jj=0UL; jj<B.columns(); jj+=block )
529 {
530 const size_t jtmp( min( jj+block, B.columns() ) );
531
532 for( size_t i=0UL; i<A.rows(); ++i )
533 {
534 auto element( A.begin(i) );
535 const auto end( A.end(i) );
536
537 for( ; element!=end; ++element )
538 {
539 const size_t i1( element->index() );
540
541 if( IsDiagonal_v<MT5> )
542 {
543 C(i,i1) = element->value() * B(i1,i1);
544 }
545 else
546 {
547 const size_t jbegin( ( IsUpper_v<MT5> )
548 ?( ( UPP )
549 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
550 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
551 :( jj ) );
552 const size_t jend( ( IsLower_v<MT5> )
553 ?( ( SYM || HERM || LOW )
554 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
555 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
556 :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
557
558 if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
559 continue;
560
561 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
562
563 for( size_t j=jbegin; j<jend; ++j ) {
564 if( isDefault( C(i,j) ) )
565 C(i,j) = element->value() * B(i1,j);
566 else
567 C(i,j) += element->value() * B(i1,j);
568 }
569 }
570 }
571 }
572 }
573
574 if( SYM || HERM ) {
575 for( size_t i=0UL; i<A.rows(); ++i ) {
576 for( size_t j=i+1UL; j<B.columns(); ++j ) {
577 C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
578 }
579 }
580 }
581 }
583 //**********************************************************************************************
584
585 //**Optimized assignment to dense matrices******************************************************
599 template< typename MT3 // Type of the left-hand side target matrix
600 , typename MT4 // Type of the left-hand side matrix operand
601 , typename MT5 > // Type of the right-hand side matrix operand
602 static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
603 -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
604 {
605 const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
606
607 reset( C );
608
609 for( size_t jj=0UL; jj<B.columns(); jj+=block )
610 {
611 const size_t jtmp( min( jj+block, B.columns() ) );
612
613 for( size_t i=0UL; i<A.rows(); ++i )
614 {
615 const auto end( A.end(i) );
616 auto element( A.begin(i) );
617
618 const size_t nonzeros( A.nonZeros(i) );
619 const size_t kpos( prevMultiple( nonzeros, 4UL ) );
620 BLAZE_INTERNAL_ASSERT( kpos <= nonzeros, "Invalid end calculation" );
621
622 for( size_t k=0UL; k<kpos; k+=4UL )
623 {
624 const size_t i1( element->index() );
625 const ET1 v1( element->value() );
626 ++element;
627 const size_t i2( element->index() );
628 const ET1 v2( element->value() );
629 ++element;
630 const size_t i3( element->index() );
631 const ET1 v3( element->value() );
632 ++element;
633 const size_t i4( element->index() );
634 const ET1 v4( element->value() );
635 ++element;
636
637 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
638
639 const size_t jbegin( ( IsUpper_v<MT5> )
640 ?( ( UPP )
641 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
642 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
643 :( UPP ? max(i,jj) : jj ) );
644 const size_t jend( ( IsLower_v<MT5> )
645 ?( ( SYM || HERM || LOW )
646 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
647 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
648 :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
649
650 if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
651 continue;
652
653 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
654
655 const size_t jnum( jend - jbegin );
656 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
657 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
658
659 for( size_t j=jbegin; j<jpos; j+=4UL ) {
660 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
661 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
662 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
663 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
664 }
665 for( size_t j=jpos; j<jend; ++j ) {
666 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
667 }
668 }
669
670 for( ; element!=end; ++element )
671 {
672 const size_t i1( element->index() );
673 const ET1 v1( element->value() );
674
675 const size_t jbegin( ( IsUpper_v<MT5> )
676 ?( ( UPP )
677 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
678 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
679 :( UPP ? max(i,jj) : jj ) );
680 const size_t jend( ( IsLower_v<MT5> )
681 ?( ( SYM || HERM || LOW )
682 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
683 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
684 :( SYM || HERM || LOW ? min(i+1UL,jtmp) : jtmp ) );
685
686 if( ( SYM || HERM || LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
687 continue;
688
689 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
690
691 const size_t jnum( jend - jbegin );
692 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
693 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
694
695 for( size_t j=jbegin; j<jpos; j+=4UL ) {
696 C(i,j ) += v1 * B(i1,j );
697 C(i,j+1UL) += v1 * B(i1,j+1UL);
698 C(i,j+2UL) += v1 * B(i1,j+2UL);
699 C(i,j+3UL) += v1 * B(i1,j+3UL);
700 }
701 for( size_t j=jpos; j<jend; ++j ) {
702 C(i,j) += v1 * B(i1,j);
703 }
704 }
705 }
706 }
707
708 if( SYM || HERM ) {
709 for( size_t i=0UL; i<A.rows(); ++i ) {
710 for( size_t j=i+1UL; j<B.columns(); ++j ) {
711 C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
712 }
713 }
714 }
715 }
717 //**********************************************************************************************
718
719 //**Vectorized assignment to dense matrices*****************************************************
733 template< typename MT3 // Type of the left-hand side target matrix
734 , typename MT4 // Type of the left-hand side matrix operand
735 , typename MT5 > // Type of the right-hand side matrix operand
736 static inline auto selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
737 -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
738 {
739 constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
740
741 reset( C );
742
743 for( size_t i=0UL; i<A.rows(); ++i )
744 {
745 const auto end( A.end(i) );
746 auto element( A.begin(i) );
747
748 const size_t nonzeros( A.nonZeros(i) );
749 const size_t kpos( prevMultiple( nonzeros, 4UL ) );
750 BLAZE_INTERNAL_ASSERT( kpos <= nonzeros, "Invalid end calculation" );
751
752 for( size_t k=0UL; k<kpos; k+=4UL )
753 {
754 const size_t i1( element->index() );
755 const ET1 v1( element->value() );
756 ++element;
757 const size_t i2( element->index() );
758 const ET1 v2( element->value() );
759 ++element;
760 const size_t i3( element->index() );
761 const ET1 v3( element->value() );
762 ++element;
763 const size_t i4( element->index() );
764 const ET1 v4( element->value() );
765 ++element;
766
767 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
768
769 const SIMDType xmm1( set( v1 ) );
770 const SIMDType xmm2( set( v2 ) );
771 const SIMDType xmm3( set( v3 ) );
772 const SIMDType xmm4( set( v4 ) );
773
774 const size_t jbegin( ( IsUpper_v<MT5> )
775 ?( ( IsStrictlyUpper_v<MT5> )
776 ?( prevMultiple( ( UPP ? max(i,i1+1UL) : i1+1UL ), SIMDSIZE ) )
777 :( prevMultiple( ( UPP ? max(i,i1) : i1 ), SIMDSIZE ) ) )
778 :( UPP ? prevMultiple( i, SIMDSIZE ) : 0UL ) );
779 const size_t jend( ( IsLower_v<MT5> )
780 ?( ( IsStrictlyLower_v<MT5> )
781 ?( SYM || HERM || LOW ? min(i+1UL,i4) : i4 )
782 :( SYM || HERM || LOW ? min(i,i4)+1UL : i4+1UL ) )
783 :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
784 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
785
786 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
787 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
788
789 size_t j( jbegin );
790
791 for( ; j<jpos; j+=SIMDSIZE ) {
792 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
793 }
794 for( ; remainder && j<jend; ++j ) {
795 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
796 }
797 }
798
799 for( ; element!=end; ++element )
800 {
801 const size_t i1( element->index() );
802 const ET1 v1( element->value() );
803
804 const SIMDType xmm1( set( v1 ) );
805
806 const size_t jbegin( ( IsUpper_v<MT5> )
807 ?( ( IsStrictlyUpper_v<MT5> )
808 ?( prevMultiple( ( UPP ? max(i,i1+1UL) : i1+1UL ), SIMDSIZE ) )
809 :( prevMultiple( ( UPP ? max(i,i1) : i1 ), SIMDSIZE ) ) )
810 :( UPP ? prevMultiple( i, SIMDSIZE ) : 0UL ) );
811 const size_t jend( ( IsLower_v<MT5> )
812 ?( ( IsStrictlyLower_v<MT5> )
813 ?( SYM || HERM || LOW ? min(i+1UL,i1) : i1 )
814 :( SYM || HERM || LOW ? min(i,i1)+1UL : i1+1UL ) )
815 :( SYM || HERM || LOW ? i+1UL : B.columns() ) );
816 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
817
818 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
819 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
820
821 size_t j( jbegin );
822
823 for( ; j<jpos; j+=SIMDSIZE ) {
824 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
825 }
826 for( ; remainder && j<jend; ++j ) {
827 C(i,j) += v1 * B(i1,j);
828 }
829 }
830 }
831
832 if( SYM || HERM ) {
833 for( size_t i=0UL; i<A.rows(); ++i ) {
834 for( size_t j=i+1UL; j<B.columns(); ++j ) {
835 C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
836 }
837 }
838 }
839 }
841 //**********************************************************************************************
842
843 //**Assignment to sparse matrices***************************************************************
856 template< typename MT // Type of the target sparse matrix
857 , bool SO > // Storage order of the target sparse matrix
858 friend inline void assign( SparseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
859 {
861
862 using TmpType = If_t< SO, OppositeType, ResultType >;
863
870
871 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
872 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
873
874 const ForwardFunctor fwd;
875
876 const TmpType tmp( serial( rhs ) );
877 assign( *lhs, fwd( tmp ) );
878 }
880 //**********************************************************************************************
881
882 //**Addition assignment to dense matrices*******************************************************
895 template< typename MT // Type of the target dense matrix
896 , bool SO > // Storage order of the target dense matrix
897 friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
898 {
900
901 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
902 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
903
904 LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
905 RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
906
907 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
908 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
909 BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
910 BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
911 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).rows() , "Invalid number of rows" );
912 BLAZE_INTERNAL_ASSERT( B.columns() == (*lhs).columns() , "Invalid number of columns" );
913
914 SMatDMatMultExpr::selectAddAssignKernel( *lhs, A, B );
915 }
917 //**********************************************************************************************
918
919 //**Default addition assignment to dense matrices***********************************************
933 template< typename MT3 // Type of the left-hand side target matrix
934 , typename MT4 // Type of the left-hand side matrix operand
935 , typename MT5 > // Type of the right-hand side matrix operand
936 static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
937 -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
938 {
939 const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
940
941 for( size_t jj=0UL; jj<B.columns(); jj+=block )
942 {
943 const size_t jtmp( min( jj+block, B.columns() ) );
944
945 for( size_t i=0UL; i<A.rows(); ++i )
946 {
947 const auto end( A.end(i) );
948 auto element( A.begin(i) );
949
950 for( ; element!=end; ++element )
951 {
952 const size_t i1( element->index() );
953
954 if( IsDiagonal_v<MT5> )
955 {
956 C(i,i1) += element->value() * B(i1,i1);
957 }
958 else
959 {
960 const size_t jbegin( ( IsUpper_v<MT5> )
961 ?( ( UPP )
962 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
963 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
964 :( jj ) );
965 const size_t jend( ( IsLower_v<MT5> )
966 ?( ( LOW )
967 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
968 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
969 :( LOW ? min(i+1UL,jtmp) : jtmp ) );
970
971 if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
972 continue;
973
974 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
975
976 const size_t jnum( jend - jbegin );
977 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
978 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
979
980 for( size_t j=jbegin; j<jpos; j+=4UL ) {
981 C(i,j ) += element->value() * B(i1,j );
982 C(i,j+1UL) += element->value() * B(i1,j+1UL);
983 C(i,j+2UL) += element->value() * B(i1,j+2UL);
984 C(i,j+3UL) += element->value() * B(i1,j+3UL);
985 }
986 for( size_t j=jpos; j<jend; ++j ) {
987 C(i,j) += element->value() * B(i1,j);
988 }
989 }
990 }
991 }
992 }
993 }
995 //**********************************************************************************************
996
997 //**Optimized addition assignment to dense matrices*********************************************
1011 template< typename MT3 // Type of the left-hand side target matrix
1012 , typename MT4 // Type of the left-hand side matrix operand
1013 , typename MT5 > // Type of the right-hand side matrix operand
1014 static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1015 -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1016 {
1017 const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
1018
1019 for( size_t jj=0UL; jj<B.columns(); jj+=block )
1020 {
1021 const size_t jtmp( min( jj+block, B.columns() ) );
1022
1023 for( size_t i=0UL; i<A.rows(); ++i )
1024 {
1025 const auto end( A.end(i) );
1026 auto element( A.begin(i) );
1027
1028 const size_t nonzeros( A.nonZeros(i) );
1029 const size_t kpos( prevMultiple( nonzeros, 4UL ) );
1030 BLAZE_INTERNAL_ASSERT( kpos <= nonzeros, "Invalid end calculation" );
1031
1032 for( size_t k=0UL; k<kpos; k+=4UL )
1033 {
1034 const size_t i1( element->index() );
1035 const ET1 v1( element->value() );
1036 ++element;
1037 const size_t i2( element->index() );
1038 const ET1 v2( element->value() );
1039 ++element;
1040 const size_t i3( element->index() );
1041 const ET1 v3( element->value() );
1042 ++element;
1043 const size_t i4( element->index() );
1044 const ET1 v4( element->value() );
1045 ++element;
1046
1047 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1048
1049 const size_t jbegin( ( IsUpper_v<MT5> )
1050 ?( ( UPP )
1051 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1052 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1053 :( UPP ? max(i,jj) : jj ) );
1054 const size_t jend( ( IsLower_v<MT5> )
1055 ?( ( LOW )
1056 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
1057 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
1058 :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1059
1060 if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1061 continue;
1062
1063 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1064
1065 const size_t jnum( jend - jbegin );
1066 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
1067 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
1068
1069 for( size_t j=jbegin; j<jpos; j+=4UL ) {
1070 C(i,j ) += v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1071 C(i,j+1UL) += v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1072 C(i,j+2UL) += v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1073 C(i,j+3UL) += v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1074 }
1075 for( size_t j=jpos; j<jend; ++j ) {
1076 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1077 }
1078 }
1079
1080 for( ; element!=end; ++element )
1081 {
1082 const size_t i1( element->index() );
1083 const ET1 v1( element->value() );
1084
1085 const size_t jbegin( ( IsUpper_v<MT5> )
1086 ?( ( UPP )
1087 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1088 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1089 :( UPP ? max(i,jj) : jj ) );
1090 const size_t jend( ( IsLower_v<MT5> )
1091 ?( ( LOW )
1092 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1093 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1094 :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1095
1096 if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1097 continue;
1098
1099 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1100
1101 const size_t jnum( jend - jbegin );
1102 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
1103 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
1104
1105 for( size_t j=jbegin; j<jpos; j+=4UL ) {
1106 C(i,j ) += v1 * B(i1,j );
1107 C(i,j+1UL) += v1 * B(i1,j+1UL);
1108 C(i,j+2UL) += v1 * B(i1,j+2UL);
1109 C(i,j+3UL) += v1 * B(i1,j+3UL);
1110 }
1111 for( size_t j=jpos; j<jend; ++j ) {
1112 C(i,j) += v1 * B(i1,j);
1113 }
1114 }
1115 }
1116 }
1117 }
1119 //**********************************************************************************************
1120
1121 //**Vectorized addition assignment to dense matrices********************************************
1135 template< typename MT3 // Type of the left-hand side target matrix
1136 , typename MT4 // Type of the left-hand side matrix operand
1137 , typename MT5 > // Type of the right-hand side matrix operand
1138 static inline auto selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1139 -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1140 {
1141 constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
1142
1143 for( size_t i=0UL; i<A.rows(); ++i )
1144 {
1145 const auto end( A.end(i) );
1146 auto element( A.begin(i) );
1147
1148 const size_t nonzeros( A.nonZeros(i) );
1149 const size_t kpos( prevMultiple( nonzeros, 4UL ) );
1150 BLAZE_INTERNAL_ASSERT( kpos <= nonzeros, "Invalid end calculation" );
1151
1152 for( size_t k=0UL; k<kpos; k+=4UL )
1153 {
1154 const size_t i1( element->index() );
1155 const ET1 v1( element->value() );
1156 ++element;
1157 const size_t i2( element->index() );
1158 const ET1 v2( element->value() );
1159 ++element;
1160 const size_t i3( element->index() );
1161 const ET1 v3( element->value() );
1162 ++element;
1163 const size_t i4( element->index() );
1164 const ET1 v4( element->value() );
1165 ++element;
1166
1167 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1168
1169 const SIMDType xmm1( set( v1 ) );
1170 const SIMDType xmm2( set( v2 ) );
1171 const SIMDType xmm3( set( v3 ) );
1172 const SIMDType xmm4( set( v4 ) );
1173
1174 const size_t jbegin( ( IsUpper_v<MT5> )
1175 ?( ( IsStrictlyUpper_v<MT5> )
1176 ?( prevMultiple( ( UPP ? max(i,i1+1UL) : i1+1UL ), SIMDSIZE ) )
1177 :( prevMultiple( ( UPP ? max(i,i1) : i1 ), SIMDSIZE ) ) )
1178 :( UPP ? prevMultiple( i, SIMDSIZE ) : 0UL ) );
1179 const size_t jend( ( IsLower_v<MT5> )
1180 ?( ( IsStrictlyLower_v<MT5> )
1181 ?( LOW ? min(i+1UL,i4) : i4 )
1182 :( LOW ? min(i,i4)+1UL : i4+1UL ) )
1183 :( LOW ? i+1UL : B.columns() ) );
1184 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1185
1186 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1187 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1188
1189 size_t j( jbegin );
1190
1191 for( ; j<jpos; j+=SIMDSIZE ) {
1192 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) + xmm2 * B.load(i2,j) + xmm3 * B.load(i3,j) + xmm4 * B.load(i4,j) );
1193 }
1194 for( ; remainder && j<jend; ++j ) {
1195 C(i,j) += v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1196 }
1197 }
1198
1199 for( ; element!=end; ++element )
1200 {
1201 const size_t i1( element->index() );
1202 const ET1 v1( element->value() );
1203
1204 const SIMDType xmm1( set( v1 ) );
1205
1206 const size_t jbegin( ( IsUpper_v<MT5> )
1207 ?( ( IsStrictlyUpper_v<MT5> )
1208 ?( prevMultiple( ( UPP ? max(i,i1+1UL) : i1+1UL ), SIMDSIZE ) )
1209 :( prevMultiple( ( UPP ? max(i,i1) : i1 ), SIMDSIZE ) ) )
1210 :( UPP ? prevMultiple( i, SIMDSIZE ) : 0UL ) );
1211 const size_t jend( ( IsLower_v<MT5> )
1212 ?( ( IsStrictlyLower_v<MT5> )
1213 ?( LOW ? min(i+1UL,i1) : i1 )
1214 :( LOW ? min(i,i1)+1UL : i1+1UL ) )
1215 :( LOW ? i+1UL : B.columns() ) );
1216 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1217
1218 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1219 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1220
1221 size_t j( jbegin );
1222
1223 for( ; j<jpos; j+=SIMDSIZE ) {
1224 C.store( i, j, C.load(i,j) + xmm1 * B.load(i1,j) );
1225 }
1226 for( ; remainder && j<jend; ++j ) {
1227 C(i,j) += v1 * B(i1,j);
1228 }
1229 }
1230 }
1231 }
1233 //**********************************************************************************************
1234
1235 //**Addition assignment to sparse matrices******************************************************
1236 // No special implementation for the addition assignment to sparse matrices.
1237 //**********************************************************************************************
1238
1239 //**Subtraction assignment to dense matrices****************************************************
1252 template< typename MT // Type of the target dense matrix
1253 , bool SO > // Storage order of the target dense matrix
1254 friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1255 {
1257
1258 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1259 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1260
1261 LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side sparse matrix operand
1262 RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1263
1264 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1265 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1266 BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1267 BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1268 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).rows() , "Invalid number of rows" );
1269 BLAZE_INTERNAL_ASSERT( B.columns() == (*lhs).columns() , "Invalid number of columns" );
1270
1271 SMatDMatMultExpr::selectSubAssignKernel( *lhs, A, B );
1272 }
1274 //**********************************************************************************************
1275
1276 //**Default subtraction assignment to dense matrices********************************************
1290 template< typename MT3 // Type of the left-hand side target matrix
1291 , typename MT4 // Type of the left-hand side matrix operand
1292 , typename MT5 > // Type of the right-hand side matrix operand
1293 static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1294 -> EnableIf_t< UseDefaultKernel_v<MT3,MT4,MT5> >
1295 {
1296 const size_t block( IsRowMajorMatrix_v<MT3> || IsDiagonal_v<MT5> ? B.columns() : 64UL );
1297
1298 for( size_t jj=0UL; jj<B.columns(); jj+=block )
1299 {
1300 const size_t jtmp( min( jj+block, B.columns() ) );
1301
1302 for( size_t i=0UL; i<A.rows(); ++i )
1303 {
1304 const auto end( A.end(i) );
1305 auto element( A.begin(i) );
1306
1307 for( ; element!=end; ++element )
1308 {
1309 const size_t i1( element->index() );
1310
1311 if( IsDiagonal_v<MT5> )
1312 {
1313 C(i,i1) -= element->value() * B(i1,i1);
1314 }
1315 else
1316 {
1317 const size_t jbegin( ( IsUpper_v<MT5> )
1318 ?( ( UPP )
1319 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1320 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1321 :( jj ) );
1322 const size_t jend( ( IsLower_v<MT5> )
1323 ?( ( LOW )
1324 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1325 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1326 :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1327
1328 if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1329 continue;
1330
1331 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1332
1333 const size_t jnum( jend - jbegin );
1334 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
1335 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
1336
1337 for( size_t j=jbegin; j<jpos; j+=4UL ) {
1338 C(i,j ) -= element->value() * B(i1,j );
1339 C(i,j+1UL) -= element->value() * B(i1,j+1UL);
1340 C(i,j+2UL) -= element->value() * B(i1,j+2UL);
1341 C(i,j+3UL) -= element->value() * B(i1,j+3UL);
1342 }
1343 for( size_t j=jpos; j<jend; ++j ) {
1344 C(i,j) -= element->value() * B(i1,j);
1345 }
1346 }
1347 }
1348 }
1349 }
1350 }
1352 //**********************************************************************************************
1353
1354 //**Optimized subtraction assignment to dense matrices******************************************
1368 template< typename MT3 // Type of the left-hand side target matrix
1369 , typename MT4 // Type of the left-hand side matrix operand
1370 , typename MT5 > // Type of the right-hand side matrix operand
1371 static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1372 -> EnableIf_t< UseOptimizedKernel_v<MT3,MT4,MT5> >
1373 {
1374 const size_t block( IsRowMajorMatrix_v<MT3> ? B.columns() : 64UL );
1375
1376 for( size_t jj=0UL; jj<B.columns(); jj+=block )
1377 {
1378 const size_t jtmp( min( jj+block, B.columns() ) );
1379
1380 for( size_t i=0UL; i<A.rows(); ++i )
1381 {
1382 const auto end( A.end(i) );
1383 auto element( A.begin(i) );
1384
1385 const size_t nonzeros( A.nonZeros(i) );
1386 const size_t kpos( prevMultiple( nonzeros, 4UL ) );
1387 BLAZE_INTERNAL_ASSERT( kpos <= nonzeros, "Invalid end calculation" );
1388
1389 for( size_t k=0UL; k<kpos; k+=4UL )
1390 {
1391 const size_t i1( element->index() );
1392 const ET1 v1( element->value() );
1393 ++element;
1394 const size_t i2( element->index() );
1395 const ET1 v2( element->value() );
1396 ++element;
1397 const size_t i3( element->index() );
1398 const ET1 v3( element->value() );
1399 ++element;
1400 const size_t i4( element->index() );
1401 const ET1 v4( element->value() );
1402 ++element;
1403
1404 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1405
1406 const size_t jbegin( ( IsUpper_v<MT5> )
1407 ?( ( UPP )
1408 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1409 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1410 :( UPP ? max(i,jj) : jj ) );
1411 const size_t jend( ( IsLower_v<MT5> )
1412 ?( ( LOW )
1413 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) )
1414 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i4 : i4+1UL ) ) ) )
1415 :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1416
1417 if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1418 continue;
1419
1420 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1421
1422 const size_t jnum( jend - jbegin );
1423 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
1424 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
1425
1426 for( size_t j=jbegin; j<jpos; j+=4UL ) {
1427 C(i,j ) -= v1 * B(i1,j ) + v2 * B(i2,j ) + v3 * B(i3,j ) + v4 * B(i4,j );
1428 C(i,j+1UL) -= v1 * B(i1,j+1UL) + v2 * B(i2,j+1UL) + v3 * B(i3,j+1UL) + v4 * B(i4,j+1UL);
1429 C(i,j+2UL) -= v1 * B(i1,j+2UL) + v2 * B(i2,j+2UL) + v3 * B(i3,j+2UL) + v4 * B(i4,j+2UL);
1430 C(i,j+3UL) -= v1 * B(i1,j+3UL) + v2 * B(i2,j+3UL) + v3 * B(i3,j+3UL) + v4 * B(i4,j+3UL);
1431 }
1432 for( size_t j=jpos; j<jend; ++j ) {
1433 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1434 }
1435 }
1436
1437 for( ; element!=end; ++element )
1438 {
1439 const size_t i1( element->index() );
1440 const ET1 v1( element->value() );
1441
1442 const size_t jbegin( ( IsUpper_v<MT5> )
1443 ?( ( UPP )
1444 ?( max( i, jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) )
1445 :( max( jj, ( IsStrictlyUpper_v<MT5> ? i1+1UL : i1 ) ) ) )
1446 :( UPP ? max(i,jj) : jj ) );
1447 const size_t jend( ( IsLower_v<MT5> )
1448 ?( ( LOW )
1449 ?( min( i+1UL, jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) )
1450 :( min( jtmp, ( IsStrictlyLower_v<MT5> ? i1 : i1+1UL ) ) ) )
1451 :( LOW ? min(i+1UL,jtmp) : jtmp ) );
1452
1453 if( ( LOW || UPP || IsTriangular_v<MT5> ) && ( jbegin >= jend ) )
1454 continue;
1455
1456 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1457
1458 const size_t jnum( jend - jbegin );
1459 const size_t jpos( jbegin + prevMultiple( jnum, 4UL ) );
1460 BLAZE_INTERNAL_ASSERT( jpos <= jbegin+jnum, "Invalid end calculation" );
1461
1462 for( size_t j=jbegin; j<jpos; j+=4UL ) {
1463 C(i,j ) -= v1 * B(i1,j );
1464 C(i,j+1UL) -= v1 * B(i1,j+1UL);
1465 C(i,j+2UL) -= v1 * B(i1,j+2UL);
1466 C(i,j+3UL) -= v1 * B(i1,j+3UL);
1467 }
1468 for( size_t j=jpos; j<jend; ++j ) {
1469 C(i,j) -= v1 * B(i1,j);
1470 }
1471 }
1472 }
1473 }
1474 }
1476 //**********************************************************************************************
1477
1478 //**Vectorized subtraction assignment to dense matrices*****************************************
1492 template< typename MT3 // Type of the left-hand side target matrix
1493 , typename MT4 // Type of the left-hand side matrix operand
1494 , typename MT5 > // Type of the right-hand side matrix operand
1495 static inline auto selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1496 -> EnableIf_t< UseVectorizedKernel_v<MT3,MT4,MT5> >
1497 {
1498 constexpr bool remainder( !IsPadded_v<MT3> || !IsPadded_v<MT5> );
1499
1500 for( size_t i=0UL; i<A.rows(); ++i )
1501 {
1502 const auto end( A.end(i) );
1503 auto element( A.begin(i) );
1504
1505 const size_t nonzeros( A.nonZeros(i) );
1506 const size_t kpos( prevMultiple( nonzeros, 4UL ) );
1507 BLAZE_INTERNAL_ASSERT( kpos <= nonzeros, "Invalid end calculation" );
1508
1509 for( size_t k=0UL; k<kpos; k+=4UL )
1510 {
1511 const size_t i1( element->index() );
1512 const ET1 v1( element->value() );
1513 ++element;
1514 const size_t i2( element->index() );
1515 const ET1 v2( element->value() );
1516 ++element;
1517 const size_t i3( element->index() );
1518 const ET1 v3( element->value() );
1519 ++element;
1520 const size_t i4( element->index() );
1521 const ET1 v4( element->value() );
1522 ++element;
1523
1524 BLAZE_INTERNAL_ASSERT( i1 < i2 && i2 < i3 && i3 < i4, "Invalid sparse matrix index detected" );
1525
1526 const SIMDType xmm1( set( v1 ) );
1527 const SIMDType xmm2( set( v2 ) );
1528 const SIMDType xmm3( set( v3 ) );
1529 const SIMDType xmm4( set( v4 ) );
1530
1531 const size_t jbegin( ( IsUpper_v<MT5> )
1532 ?( ( IsStrictlyUpper_v<MT5> )
1533 ?( prevMultiple( ( UPP ? max(i,i1+1UL) : i1+1UL ), SIMDSIZE ) )
1534 :( prevMultiple( ( UPP ? max(i,i1) : i1 ), SIMDSIZE ) ) )
1535 :( UPP ? prevMultiple( i, SIMDSIZE ) : 0UL ) );
1536 const size_t jend( ( IsLower_v<MT5> )
1537 ?( ( IsStrictlyLower_v<MT5> )
1538 ?( LOW ? min(i+1UL,i4) : i4 )
1539 :( LOW ? min(i,i4)+1UL : i4+1UL ) )
1540 :( LOW ? i+1UL : B.columns() ) );
1541 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1542
1543 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1544 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1545
1546 size_t j( jbegin );
1547
1548 for( ; j<jpos; j+=SIMDSIZE ) {
1549 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) - xmm2 * B.load(i2,j) - xmm3 * B.load(i3,j) - xmm4 * B.load(i4,j) );
1550 }
1551 for( ; remainder && j<jend; ++j ) {
1552 C(i,j) -= v1 * B(i1,j) + v2 * B(i2,j) + v3 * B(i3,j) + v4 * B(i4,j);
1553 }
1554 }
1555
1556 for( ; element!=end; ++element )
1557 {
1558 const size_t i1( element->index() );
1559 const ET1 v1( element->value() );
1560
1561 const SIMDType xmm1( set( v1 ) );
1562
1563 const size_t jbegin( ( IsUpper_v<MT5> )
1564 ?( ( IsStrictlyUpper_v<MT5> )
1565 ?( prevMultiple( ( UPP ? max(i,i1+1UL) : i1+1UL ), SIMDSIZE ) )
1566 :( prevMultiple( ( UPP ? max(i,i1) : i1 ), SIMDSIZE ) ) )
1567 :( UPP ? prevMultiple( i, SIMDSIZE ) : 0UL ) );
1568 const size_t jend( ( IsLower_v<MT5> )
1569 ?( ( IsStrictlyLower_v<MT5> )
1570 ?( LOW ? min(i+1UL,i1) : i1 )
1571 :( LOW ? min(i,i1)+1UL : i1+1UL ) )
1572 :( LOW ? i+1UL : B.columns() ) );
1573 BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1574
1575 const size_t jpos( remainder ? prevMultiple( jend, SIMDSIZE ) : jend );
1576 BLAZE_INTERNAL_ASSERT( jpos <= jend, "Invalid end calculation" );
1577
1578 size_t j( jbegin );
1579
1580 for( ; j<jpos; j+=SIMDSIZE ) {
1581 C.store( i, j, C.load(i,j) - xmm1 * B.load(i1,j) );
1582 }
1583 for( ; remainder && j<jend; ++j ) {
1584 C(i,j) -= v1 * B(i1,j);
1585 }
1586 }
1587 }
1588 }
1590 //**********************************************************************************************
1591
1592 //**Subtraction assignment to sparse matrices***************************************************
1593 // No special implementation for the subtraction assignment to sparse matrices.
1594 //**********************************************************************************************
1595
1596 //**Schur product assignment to dense matrices**************************************************
1609 template< typename MT // Type of the target dense matrix
1610 , bool SO > // Storage order of the target dense matrix
1611 friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1612 {
1614
1618
1619 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1620 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1621
1622 const ResultType tmp( serial( rhs ) );
1623 schurAssign( *lhs, tmp );
1624 }
1626 //**********************************************************************************************
1627
1628 //**Schur product assignment to sparse matrices*************************************************
1629 // No special implementation for the Schur product assignment to sparse matrices.
1630 //**********************************************************************************************
1631
1632 //**Multiplication assignment to dense matrices*************************************************
1633 // No special implementation for the multiplication assignment to dense matrices.
1634 //**********************************************************************************************
1635
1636 //**Multiplication assignment to sparse matrices************************************************
1637 // No special implementation for the multiplication assignment to sparse matrices.
1638 //**********************************************************************************************
1639
1640 //**SMP assignment to dense matrices************************************************************
1655 template< typename MT // Type of the target dense matrix
1656 , bool SO > // Storage order of the target dense matrix
1657 friend inline auto smpAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1658 -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1659 {
1661
1662 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1663 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1664
1665 LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1666 RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1667
1668 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1669 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1670 BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1671 BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1672 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).rows() , "Invalid number of rows" );
1673 BLAZE_INTERNAL_ASSERT( B.columns() == (*lhs).columns() , "Invalid number of columns" );
1674
1675 smpAssign( *lhs, A * B );
1676 }
1678 //**********************************************************************************************
1679
1680 //**SMP assignment to sparse matrices***********************************************************
1695 template< typename MT // Type of the target sparse matrix
1696 , bool SO > // Storage order of the target sparse matrix
1697 friend inline auto smpAssign( SparseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1698 -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1699 {
1701
1702 using TmpType = If_t< SO, OppositeType, ResultType >;
1703
1710
1711 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1712 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1713
1714 const ForwardFunctor fwd;
1715
1716 const TmpType tmp( rhs );
1717 smpAssign( *lhs, fwd( tmp ) );
1718 }
1720 //**********************************************************************************************
1721
1722 //**SMP addition assignment to dense matrices***************************************************
1738 template< typename MT // Type of the target dense matrix
1739 , bool SO > // Storage order of the target dense matrix
1740 friend inline auto smpAddAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1741 -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1742 {
1744
1745 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1746 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1747
1748 LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1749 RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1750
1751 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1752 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1753 BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1754 BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1755 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).rows() , "Invalid number of rows" );
1756 BLAZE_INTERNAL_ASSERT( B.columns() == (*lhs).columns() , "Invalid number of columns" );
1757
1758 smpAddAssign( *lhs, A * B );
1759 }
1761 //**********************************************************************************************
1762
1763 //**SMP addition assignment to sparse matrices**************************************************
1764 // No special implementation for the SMP addition assignment to sparse matrices.
1765 //**********************************************************************************************
1766
1767 //**SMP subtraction assignment to dense matrices************************************************
1783 template< typename MT // Type of the target dense matrix
1784 , bool SO > // Storage order of the target dense matrix
1785 friend inline auto smpSubAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1786 -> EnableIf_t< IsEvaluationRequired_v<MT,MT1,MT2> >
1787 {
1789
1790 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1791 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1792
1793 LT A( rhs.lhs_ ); // Evaluation of the left-hand side sparse matrix operand
1794 RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1795
1796 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1797 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1798 BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1799 BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1800 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).rows() , "Invalid number of rows" );
1801 BLAZE_INTERNAL_ASSERT( B.columns() == (*lhs).columns() , "Invalid number of columns" );
1802
1803 smpSubAssign( *lhs, A * B );
1804 }
1806 //**********************************************************************************************
1807
1808 //**SMP subtraction assignment to sparse matrices***********************************************
1809 // No special implementation for the SMP subtraction assignment to sparse matrices.
1810 //**********************************************************************************************
1811
1812 //**SMP Schur product assignment to dense matrices**********************************************
1825 template< typename MT // Type of the target dense matrix
1826 , bool SO > // Storage order of the target dense matrix
1827 friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const SMatDMatMultExpr& rhs )
1828 {
1830
1834
1835 BLAZE_INTERNAL_ASSERT( (*lhs).rows() == rhs.rows() , "Invalid number of rows" );
1836 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == rhs.columns(), "Invalid number of columns" );
1837
1838 const ResultType tmp( rhs );
1839 smpSchurAssign( *lhs, tmp );
1840 }
1842 //**********************************************************************************************
1843
1844 //**SMP Schur product assignment to sparse matrices*********************************************
1845 // No special implementation for the SMP Schur product assignment to sparse matrices.
1846 //**********************************************************************************************
1847
1848 //**SMP multiplication assignment to dense matrices*********************************************
1849 // No special implementation for the SMP multiplication assignment to dense matrices.
1850 //**********************************************************************************************
1851
1852 //**SMP multiplication assignment to sparse matrices********************************************
1853 // No special implementation for the SMP multiplication assignment to sparse matrices.
1854 //**********************************************************************************************
1855
1856 //**Compile time checks*************************************************************************
1865 //**********************************************************************************************
1866};
1867//*************************************************************************************************
1868
1869
1870
1871
1872//=================================================================================================
1873//
1874// GLOBAL BINARY ARITHMETIC OPERATORS
1875//
1876//=================================================================================================
1877
1878//*************************************************************************************************
1891template< typename MT1 // Type of the left-hand side dense matrix
1892 , typename MT2 // Type of the right-hand side sparse matrix
1893 , DisableIf_t< ( IsIdentity_v<MT1> &&
1894 IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > ) ||
1895 IsZero_v<MT1> >* = nullptr >
1896inline const SMatDMatMultExpr<MT1,MT2,false,false,false,false>
1897 smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1898{
1900
1901 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).rows(), "Invalid matrix sizes" );
1902
1903 return SMatDMatMultExpr<MT1,MT2,false,false,false,false>( *lhs, *rhs );
1904}
1906//*************************************************************************************************
1907
1908
1909//*************************************************************************************************
1923template< typename MT1 // Type of the left-hand side sparse matrix
1924 , typename MT2 // Type of the right-hand side dense matrix
1925 , EnableIf_t< IsIdentity_v<MT1> &&
1926 IsSame_v< ElementType_t<MT1>, ElementType_t<MT2> > >* = nullptr >
1927inline const MT2&
1928 smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1929{
1931
1932 MAYBE_UNUSED( lhs );
1933
1934 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).rows(), "Invalid matrix sizes" );
1935
1936 return (*rhs);
1937}
1939//*************************************************************************************************
1940
1941
1942//*************************************************************************************************
1955template< typename MT1 // Type of the left-hand side dense matrix
1956 , typename MT2 // Type of the right-hand side sparse matrix
1957 , EnableIf_t< IsZero_v<MT1> >* = nullptr >
1958inline decltype(auto)
1959 smatdmatmult( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
1960{
1962
1963 BLAZE_INTERNAL_ASSERT( (*lhs).columns() == (*rhs).rows(), "Invalid matrix sizes" );
1964
1965 using ReturnType = const MultTrait_t< ResultType_t<MT1>, ResultType_t<MT2> >;
1966
1969
1970 return ReturnType( (*lhs).rows(), (*rhs).columns() );
1971}
1973//*************************************************************************************************
1974
1975
1976//*************************************************************************************************
2005template< typename MT1 // Type of the left-hand side sparse matrix
2006 , typename MT2 > // Type of the right-hand side dense matrix
2007inline decltype(auto)
2008 operator*( const SparseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,false>& rhs )
2009{
2011
2012 if( (*lhs).columns() != (*rhs).rows() ) {
2013 BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
2014 }
2015
2016 return smatdmatmult( *lhs, *rhs );
2017}
2018//*************************************************************************************************
2019
2020
2021
2022
2023//=================================================================================================
2024//
2025// GLOBAL FUNCTIONS
2026//
2027//=================================================================================================
2028
2029//*************************************************************************************************
2053template< typename MT1 // Type of the left-hand side sparse matrix
2054 , typename MT2 // Type of the right-hand side dense matrix
2055 , bool SF // Symmetry flag
2056 , bool HF // Hermitian flag
2057 , bool LF // Lower flag
2058 , bool UF > // Upper flag
2059inline decltype(auto) declsym( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2060{
2062
2063 if( !isSquare( dm ) ) {
2064 BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
2065 }
2066
2067 using ReturnType = const SMatDMatMultExpr<MT1,MT2,true,HF,LF,UF>;
2068 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2069}
2071//*************************************************************************************************
2072
2073
2074//*************************************************************************************************
2098template< typename MT1 // Type of the left-hand side sparse matrix
2099 , typename MT2 // Type of the right-hand side dense matrix
2100 , bool SF // Symmetry flag
2101 , bool HF // Hermitian flag
2102 , bool LF // Lower flag
2103 , bool UF > // Upper flag
2104inline decltype(auto) declherm( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2105{
2107
2108 if( !isSquare( dm ) ) {
2109 BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
2110 }
2111
2112 using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,true,LF,UF>;
2113 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2114}
2116//*************************************************************************************************
2117
2118
2119//*************************************************************************************************
2143template< typename MT1 // Type of the left-hand side dense matrix
2144 , typename MT2 // Type of the right-hand side dense matrix
2145 , bool SF // Symmetry flag
2146 , bool HF // Hermitian flag
2147 , bool LF // Lower flag
2148 , bool UF > // Upper flag
2149inline decltype(auto) decllow( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2150{
2152
2153 if( !isSquare( dm ) ) {
2154 BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
2155 }
2156
2157 using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,true,UF>;
2158 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2159}
2161//*************************************************************************************************
2162
2163
2164//*************************************************************************************************
2188template< typename MT1 // Type of the left-hand side dense matrix
2189 , typename MT2 // Type of the right-hand side dense matrix
2190 , bool SF // Symmetry flag
2191 , bool HF // Hermitian flag
2192 , bool UF > // Upper flag
2193inline decltype(auto) declunilow( const SMatDMatMultExpr<MT1,MT2,SF,HF,false,UF>& dm )
2194{
2196
2197 if( !isSquare( dm ) ) {
2198 BLAZE_THROW_INVALID_ARGUMENT( "Invalid unilower matrix specification" );
2199 }
2200
2201 return declunilow( decllow( *dm ) );
2202}
2204//*************************************************************************************************
2205
2206
2207//*************************************************************************************************
2231template< typename MT1 // Type of the left-hand side dense matrix
2232 , typename MT2 // Type of the right-hand side dense matrix
2233 , bool SF // Symmetry flag
2234 , bool HF // Hermitian flag
2235 , bool UF > // Upper flag
2236inline decltype(auto) declstrlow( const SMatDMatMultExpr<MT1,MT2,SF,HF,false,UF>& dm )
2237{
2239
2240 if( !isSquare( dm ) ) {
2241 BLAZE_THROW_INVALID_ARGUMENT( "Invalid strictly lower matrix specification" );
2242 }
2243
2244 return declstrlow( decllow( *dm ) );
2245}
2247//*************************************************************************************************
2248
2249
2250//*************************************************************************************************
2274template< typename MT1 // Type of the left-hand side dense matrix
2275 , typename MT2 // Type of the right-hand side dense matrix
2276 , bool SF // Symmetry flag
2277 , bool HF // Hermitian flag
2278 , bool LF // Lower flag
2279 , bool UF > // Upper flag
2280inline decltype(auto) declupp( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2281{
2283
2284 if( !isSquare( dm ) ) {
2285 BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
2286 }
2287
2288 using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,true>;
2289 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2290}
2292//*************************************************************************************************
2293
2294
2295//*************************************************************************************************
2319template< typename MT1 // Type of the left-hand side dense matrix
2320 , typename MT2 // Type of the right-hand side dense matrix
2321 , bool SF // Symmetry flag
2322 , bool HF // Hermitian flag
2323 , bool LF > // Lower flag
2324inline decltype(auto) decluniupp( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,false>& dm )
2325{
2327
2328 if( !isSquare( dm ) ) {
2329 BLAZE_THROW_INVALID_ARGUMENT( "Invalid uniupper matrix specification" );
2330 }
2331
2332 return decluniupp( declupp( *dm ) );
2333}
2335//*************************************************************************************************
2336
2337
2338//*************************************************************************************************
2362template< typename MT1 // Type of the left-hand side dense matrix
2363 , typename MT2 // Type of the right-hand side dense matrix
2364 , bool SF // Symmetry flag
2365 , bool HF // Hermitian flag
2366 , bool LF > // Lower flag
2367inline decltype(auto) declstrupp( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,false>& dm )
2368{
2370
2371 if( !isSquare( dm ) ) {
2372 BLAZE_THROW_INVALID_ARGUMENT( "Invalid strictly upper matrix specification" );
2373 }
2374
2375 return declstrupp( declupp( *dm ) );
2376}
2378//*************************************************************************************************
2379
2380
2381//*************************************************************************************************
2405template< typename MT1 // Type of the left-hand side dense matrix
2406 , typename MT2 // Type of the right-hand side dense matrix
2407 , bool SF // Symmetry flag
2408 , bool HF // Hermitian flag
2409 , bool LF // Lower flag
2410 , bool UF > // Upper flag
2411inline decltype(auto) decldiag( const SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
2412{
2414
2415 if( !isSquare( dm ) ) {
2416 BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
2417 }
2418
2419 using ReturnType = const SMatDMatMultExpr<MT1,MT2,SF,HF,true,true>;
2420 return ReturnType( dm.leftOperand(), dm.rightOperand() );
2421}
2423//*************************************************************************************************
2424
2425
2426
2427
2428//=================================================================================================
2429//
2430// SIZE SPECIALIZATIONS
2431//
2432//=================================================================================================
2433
2434//*************************************************************************************************
2436template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2437struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 0UL >
2438 : public Size<MT1,0UL>
2439{};
2440
2441template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2442struct Size< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, 1UL >
2443 : public Size<MT2,1UL>
2444{};
2446//*************************************************************************************************
2447
2448
2449
2450
2451//=================================================================================================
2452//
2453// ISALIGNED SPECIALIZATIONS
2454//
2455//=================================================================================================
2456
2457//*************************************************************************************************
2459template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
2460struct IsAligned< SMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
2461 : public IsAligned<MT2>
2462{};
2464//*************************************************************************************************
2465
2466} // namespace blaze
2467
2468#endif
Header file for auxiliary alias declarations.
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.
Definition: Aliases.h:110
typename T::ResultType ResultType_t
Alias declaration for nested ResultType type definitions.
Definition: Aliases.h:450
typename T::ElementType ElementType_t
Alias declaration for nested ElementType type definitions.
Definition: Aliases.h:190
typename T::OppositeType OppositeType_t
Alias declaration for nested OppositeType type definitions.
Definition: Aliases.h:310
typename T::TransposeType TransposeType_t
Alias declaration for nested TransposeType type definitions.
Definition: Aliases.h:550
Header file for run time assertion macros.
Header file for the blaze::checked and blaze::unchecked instances.
Constraints on the storage order of matrix types.
Header file for the conjugate shim.
Header file for the decldiag trait.
Header file for the DeclDiag functor.
Header file for the declherm trait.
Header file for the DeclHerm functor.
Header file for the decllow trait.
Header file for the DeclLow functor.
Header file for the declsym trait.
Header file for the DeclSym functor.
Header file for the declupp trait.
Header file for the DeclUpp functor.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the HasSIMDAdd type trait.
Header file for the HasSIMDMult type trait.
Header file for the If class template.
Header file for the IntegralConstant class template.
Header file for the IsAligned type trait.
Header file for the IsBuiltin type trait.
Header file for the IsComputation type trait class.
Header file for the IsDiagonal type trait.
Header file for the IsExpression type trait class.
Header file for the IsIdentity type trait.
Header file for the IsLower type trait.
Header file for the IsPadded type trait.
Header file for the IsResizable type trait.
Header file for the IsRowMajorMatrix type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsStrictlyLower type trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsTriangular type trait.
Header file for the IsUpper type trait.
Header file for the MAYBE_UNUSED function template.
Header file for the multiplication trait.
Header file for the Noop functor.
Header file for the prevMultiple shim.
Constraints on the storage order of matrix types.
Header file for all SIMD functionality.
Constraint on the data type.
Base class for dense matrices.
Definition: DenseMatrix.h:82
SIMD characteristics of data types.
Definition: SIMDTrait.h:297
Expression object for sparse matrix-dense matrix multiplications.
Definition: SMatDMatMultExpr.h:132
RightOperand rightOperand() const noexcept
Returns the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:412
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: SMatDMatMultExpr.h:456
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:464
OppositeType_t< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: SMatDMatMultExpr.h:258
ResultType_t< MT1 > RT1
Result type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:135
static constexpr bool evaluateRight
Compilation switch for the composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:150
CompositeType_t< MT1 > CT1
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:139
static constexpr bool evaluateLeft
Compilation switch for the composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:145
SMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the SMatDMatMultExpr class.
Definition: SMatDMatMultExpr.h:302
static constexpr bool SYM
Flag for symmetric matrices.
Definition: SMatDMatMultExpr.h:154
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: SMatDMatMultExpr.h:382
static constexpr bool LOW
Flag for lower matrices.
Definition: SMatDMatMultExpr.h:156
If_t< IsExpression_v< MT2 >, const MT2, const MT2 & > RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:269
static constexpr bool simdEnabled
Compilation switch for the expression template evaluation strategy.
Definition: SMatDMatMultExpr.h:280
TransposeType_t< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: SMatDMatMultExpr.h:259
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: SMatDMatMultExpr.h:446
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: SMatDMatMultExpr.h:317
const ElementType ReturnType
Return type for expression template evaluations.
Definition: SMatDMatMultExpr.h:262
If_t< IsExpression_v< MT1 >, const MT1, const MT1 & > LeftOperand
Composite type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:266
static constexpr bool HERM
Flag for Hermitian matrices.
Definition: SMatDMatMultExpr.h:155
SIMDTrait_t< ElementType > SIMDType
Resulting SIMD element type.
Definition: SMatDMatMultExpr.h:261
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: SMatDMatMultExpr.h:424
const ResultType CompositeType
Data type for composite expression templates.
Definition: SMatDMatMultExpr.h:263
LeftOperand leftOperand() const noexcept
Returns the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:402
CompositeType_t< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:140
LeftOperand lhs_
Left-hand side sparse matrix of the multiplication expression.
Definition: SMatDMatMultExpr.h:463
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: SMatDMatMultExpr.h:366
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: SMatDMatMultExpr.h:436
static constexpr bool UPP
Flag for upper matrices.
Definition: SMatDMatMultExpr.h:157
ResultType_t< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:136
If_t< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side sparse matrix operand.
Definition: SMatDMatMultExpr.h:272
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: SMatDMatMultExpr.h:392
static constexpr size_t SIMDSIZE
The number of elements packed within a single SIMD element.
Definition: SMatDMatMultExpr.h:293
ElementType_t< ResultType > ElementType
Resulting element type.
Definition: SMatDMatMultExpr.h:260
typename If_t< HERM, DeclHermTrait< MultTrait_t< RT1, RT2 > >, If_t< SYM, DeclSymTrait< MultTrait_t< RT1, RT2 > >, If_t< LOW, If_t< UPP, DeclDiagTrait< MultTrait_t< RT1, RT2 > >, DeclLowTrait< MultTrait_t< RT1, RT2 > > >, If_t< UPP, DeclUppTrait< MultTrait_t< RT1, RT2 > >, MultTrait< RT1, RT2 > > > > >::Type ResultType
Result type for expression template evaluations.
Definition: SMatDMatMultExpr.h:256
static constexpr bool smpAssignable
Compilation switch for the expression template assignment strategy.
Definition: SMatDMatMultExpr.h:287
ElementType_t< RT1 > ET1
Element type of the left-hand side sparse matrix expression.
Definition: SMatDMatMultExpr.h:137
If_t< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: SMatDMatMultExpr.h:275
ElementType_t< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: SMatDMatMultExpr.h:138
Base class for sparse matrices.
Definition: SparseMatrix.h:77
Constraint on the data type.
Constraint on the data type.
Constraint on the data type.
Constraint on the data type.
Header file for the Computation base class.
Header file for the DenseMatrix base class.
Header file for the MatMatMultExpr base class.
decltype(auto) column(Matrix< MT, SO > &matrix, RCAs... args)
Creating a view on a specific column of the given matrix.
Definition: Column.h:137
decltype(auto) min(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise minimum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1339
decltype(auto) max(const DenseMatrix< MT1, SO1 > &lhs, const DenseMatrix< MT2, SO2 > &rhs)
Computes the componentwise maximum of the dense matrices lhs and rhs.
Definition: DMatDMatMapExpr.h:1375
decltype(auto) declstrupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as strictly upper.
Definition: DMatDeclStrUppExpr.h:1003
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1464
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:978
decltype(auto) declstrlow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as strictly lower.
Definition: DMatDeclStrLowExpr.h:1003
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:812
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1004
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1004
decltype(auto) decluniupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as uniupper.
Definition: DMatDeclUniUppExpr.h:1005
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1005
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1005
decltype(auto) declunilow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as unilower.
Definition: DMatDeclUniLowExpr.h:1004
bool isDefault(const DiagonalMatrix< MT, SO, DF > &m)
Returns whether the given diagonal matrix is in default state.
Definition: DiagonalMatrix.h:169
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.
Definition: RowMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.
Definition: StorageOrder.h:84
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.
Definition: RequiresEvaluation.h:81
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.
Definition: MatMatMultExpr.h:103
#define BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE(T)
Constraint on the data type.
Definition: Zero.h:81
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.
Definition: DenseMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_MATRIX_TYPE(T)
Constraint on the data type.
Definition: SparseMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.
Definition: ColumnMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE(T)
Constraint on the data type.
Definition: Zero.h:61
BLAZE_ALWAYS_INLINE constexpr auto prevMultiple(T1 value, T2 factor) noexcept
Rounds down an integral value to the previous multiple of a given factor.
Definition: PrevMultiple.h:68
constexpr void reset(Matrix< MT, SO > &matrix)
Resetting the given matrix.
Definition: Matrix.h:806
MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:584
MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:518
bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:1383
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:137
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v< T > &&HasSize_v< T, 1UL >, If_t< IsSigned_v< T >, SIMDint8, SIMDuint8 > > set(T value) noexcept
Sets all values in the vector to the given 1-byte integral value.
Definition: Set.h:75
typename SIMDTrait< T >::Type SIMDTrait_t
Auxiliary alias declaration for the SIMDTrait class template.
Definition: SIMDTrait.h:315
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:194
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:158
typename EnableIf< Condition, T >::Type EnableIf_t
Auxiliary type for the EnableIf class template.
Definition: EnableIf.h:138
constexpr void MAYBE_UNUSED(const Args &...)
Suppression of unused parameter warnings.
Definition: MaybeUnused.h:81
typename If< Condition >::template Type< T1, T2 > If_t
Auxiliary alias template for the If class template.
Definition: If.h:108
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.
Definition: Exception.h:331
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.
Definition: Exception.h:235
typename EnableIf<!Condition, T >::Type DisableIf_t
Auxiliary type for the EnableIf class template.
Definition: EnableIf.h:175
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
constexpr Unchecked unchecked
Global Unchecked instance.
Definition: Check.h:146
Header file for the exception macros of the math module.
Constraints on the storage order of matrix types.
Header file for all forward declarations for expression class templates.
Header file for the Size type trait.
Header file for the reset shim.
Header file for the serial shim.
Base class for all compute expression templates.
Definition: Computation.h:68
Base template for the DeclDiagTrait class.
Definition: DeclDiagTrait.h:127
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:61
Base template for the DeclHermTrait class.
Definition: DeclHermTrait.h:126
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:61
Base template for the DeclLowTrait class.
Definition: DeclLowTrait.h:126
Generic wrapper for the decllow() function.
Definition: DeclLow.h:61
Base template for the DeclSymTrait class.
Definition: DeclSymTrait.h:126
Generic wrapper for the declsym() function.
Definition: DeclSym.h:61
Base template for the DeclUppTrait class.
Definition: DeclUppTrait.h:126
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:61
Base class for all matrix/matrix multiplication expression templates.
Definition: MatMatMultExpr.h:71
Base template for the MultTrait class.
Definition: MultTrait.h:130
Generic wrapper for the null function.
Definition: Noop.h:62
System settings for performance optimizations.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
Header file for the IsZero type trait.
Header file for the RequiresEvaluation type trait.
Header file for basic type definitions.
Header file for the generic max algorithm.
Header file for the generic min algorithm.