Blaze 3.9
TDMatSVecMultExpr.h
Go to the documentation of this file.
1//=================================================================================================
33//=================================================================================================
34
35#ifndef _BLAZE_MATH_EXPRESSIONS_TDMATSVECMULTEXPR_H_
36#define _BLAZE_MATH_EXPRESSIONS_TDMATSVECMULTEXPR_H_
37
38
39//*************************************************************************************************
40// Includes
41//*************************************************************************************************
42
43#include <blaze/math/Aliases.h>
62#include <blaze/math/SIMD.h>
83#include <blaze/util/Assert.h>
84#include <blaze/util/EnableIf.h>
87#include <blaze/util/mpl/If.h>
88#include <blaze/util/Types.h>
89
90
91namespace blaze {
92
93//=================================================================================================
94//
95// CLASS TDMATSVECMULTEXPR
96//
97//=================================================================================================
98
99//*************************************************************************************************
106template< typename MT // Type of the left-hand side dense matrix
107 , typename VT > // Type of the right-hand side sparse vector
109 : public MatVecMultExpr< DenseVector< TDMatSVecMultExpr<MT,VT>, false > >
110 , private Computation
111{
112 private:
113 //**Type definitions****************************************************************************
120 //**********************************************************************************************
121
122 //**********************************************************************************************
124 static constexpr bool evaluateMatrix = RequiresEvaluation_v<MT>;
125 //**********************************************************************************************
126
127 //**********************************************************************************************
129 static constexpr bool evaluateVector = ( IsComputation_v<VT> || RequiresEvaluation_v<VT> );
130 //**********************************************************************************************
131
132 //**********************************************************************************************
134
138 template< typename T1 >
139 static constexpr bool UseSMPAssign_v = ( evaluateMatrix || evaluateVector );
141 //**********************************************************************************************
142
143 //**********************************************************************************************
145
149 template< typename T1, typename T2, typename T3 >
150 static constexpr bool UseVectorizedKernel_v =
151 ( useOptimizedKernels &&
152 !IsDiagonal_v<T2> &&
153 T1::simdEnabled && T2::simdEnabled &&
154 IsSIMDCombinable_v< ElementType_t<T1>
156 , ElementType_t<T3> > &&
157 HasSIMDAdd_v< ElementType_t<T2>, ElementType_t<T3> > &&
158 HasSIMDMult_v< ElementType_t<T2>, ElementType_t<T3> > );
160 //**********************************************************************************************
161
162 //**********************************************************************************************
164
168 template< typename T1, typename T2, typename T3 >
169 static constexpr bool UseOptimizedKernel_v =
170 ( !UseVectorizedKernel_v<T1,T2,T3> &&
171 !IsDiagonal_v<T2> &&
172 !IsResizable_v< ElementType_t<T1> > &&
173 !IsResizable_v<VET> );
175 //**********************************************************************************************
176
177 //**********************************************************************************************
179
182 template< typename T1, typename T2, typename T3 >
183 static constexpr bool UseDefaultKernel_v =
184 ( !UseVectorizedKernel_v<T1,T2,T3> && !UseOptimizedKernel_v<T1,T2,T3> );
186 //**********************************************************************************************
187
188 public:
189 //**Type definitions****************************************************************************
192
195
200 using ReturnType = const ElementType;
201 using CompositeType = const ResultType;
202
204 using LeftOperand = If_t< IsExpression_v<MT>, const MT, const MT& >;
205
207 using RightOperand = If_t< IsExpression_v<VT>, const VT, const VT& >;
208
211
214 //**********************************************************************************************
215
216 //**Compilation flags***************************************************************************
218 static constexpr bool simdEnabled =
219 ( !IsDiagonal_v<MT> &&
220 MT::simdEnabled &&
221 HasSIMDAdd_v<MET,VET> &&
222 HasSIMDMult_v<MET,VET> );
223
225 static constexpr bool smpAssignable =
226 ( !evaluateMatrix && MT::smpAssignable && !evaluateVector && VT::smpAssignable );
227 //**********************************************************************************************
228
229 //**SIMD properties*****************************************************************************
231 static constexpr size_t SIMDSIZE = SIMDTrait<ElementType>::size;
232 //**********************************************************************************************
233
234 //**Constructor*********************************************************************************
240 inline TDMatSVecMultExpr( const MT& mat, const VT& vec ) noexcept
241 : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
242 , vec_( vec ) // Right-hand side sparse vector of the multiplication expression
243 {
244 BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
245 }
246 //**********************************************************************************************
247
248 //**Subscript operator**************************************************************************
254 inline ReturnType operator[]( size_t index ) const {
255 BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
256
257 if( IsDiagonal_v<MT> )
258 {
259 return mat_(index,index) * vec_[index];
260 }
261 else if( IsLower_v<MT> )
262 {
263 const size_t n( IsStrictlyLower_v<MT> ? index : index+1UL );
264 return subvector( row( mat_, index, unchecked ), 0UL, n, unchecked ) *
265 subvector( vec_, 0UL, n, unchecked );
266 }
267 else if( IsUpper_v<MT> )
268 {
269 const size_t begin( IsStrictlyUpper_v<MT> ? index+1UL : index );
270 const size_t n ( mat_.columns() - begin );
271 return subvector( row( mat_, index, unchecked ), begin, n, unchecked ) *
273 }
274 else
275 {
276 return row( mat_, index, unchecked ) * vec_;
277 }
278 }
279 //**********************************************************************************************
280
281 //**At function*********************************************************************************
288 inline ReturnType at( size_t index ) const {
289 if( index >= mat_.rows() ) {
290 BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
291 }
292 return (*this)[index];
293 }
294 //**********************************************************************************************
295
296 //**Size function*******************************************************************************
301 inline size_t size() const noexcept {
302 return mat_.rows();
303 }
304 //**********************************************************************************************
305
306 //**Left operand access*************************************************************************
311 inline LeftOperand leftOperand() const noexcept {
312 return mat_;
313 }
314 //**********************************************************************************************
315
316 //**Right operand access************************************************************************
321 inline RightOperand rightOperand() const noexcept {
322 return vec_;
323 }
324 //**********************************************************************************************
325
326 //**********************************************************************************************
332 template< typename T >
333 inline bool canAlias( const T* alias ) const noexcept {
334 return mat_.isAliased( alias ) || vec_.isAliased( alias );
335 }
336 //**********************************************************************************************
337
338 //**********************************************************************************************
344 template< typename T >
345 inline bool isAliased( const T* alias ) const noexcept {
346 return mat_.isAliased( alias ) || vec_.isAliased( alias );
347 }
348 //**********************************************************************************************
349
350 //**********************************************************************************************
355 inline bool isAligned() const noexcept {
356 return mat_.isAligned();
357 }
358 //**********************************************************************************************
359
360 //**********************************************************************************************
365 inline bool canSMPAssign() const noexcept {
366 return ( size() > SMP_TDMATSVECMULT_THRESHOLD );
367 }
368 //**********************************************************************************************
369
370 private:
371 //**Member variables****************************************************************************
374 //**********************************************************************************************
375
376 //**Assignment to dense vectors*****************************************************************
389 template< typename VT1 > // Type of the target dense vector
390 friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
391 {
393
394 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
395
396 // Evaluation of the right-hand side sparse vector operand
397 RT x( serial( rhs.vec_ ) );
398 if( x.nonZeros() == 0UL ) {
399 reset( *lhs );
400 return;
401 }
402
403 // Evaluation of the left-hand side dense matrix operand
404 LT A( serial( rhs.mat_ ) );
405
406 // Checking the evaluated operands
407 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
408 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
409 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
410 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).size() , "Invalid vector size" );
411
412 // Performing the dense matrix-sparse vector multiplication
413 TDMatSVecMultExpr::selectAssignKernel( *lhs, A, x );
414 }
416 //**********************************************************************************************
417
418 //**Default assignment to dense vectors*********************************************************
432 template< typename VT1 // Type of the left-hand side target vector
433 , typename MT1 // Type of the left-hand side matrix operand
434 , typename VT2 > // Type of the right-hand side vector operand
435 static inline auto selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
437 {
438 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
439
440 const size_t M( A.rows() );
441
442 auto element( x.begin() );
443 const auto end( x.end() );
444
445 size_t last( 0UL );
446
447 if( IsLower_v<MT1> ) {
448 const size_t iend( IsStrictlyLower_v<MT1> ? element->index()+1UL : element->index() );
449 for( size_t i=0UL; i<iend; ++i )
450 reset( y[i] );
451 }
452
453 for( ; element!=end; ++element )
454 {
455 const size_t index( element->index() );
456
457 if( IsDiagonal_v<MT1> )
458 {
459 for( size_t i=last; i<index; ++i )
460 reset( y[i] );
461
462 y[index] = A(index,index) * element->value();
463 last = index + 1UL;
464 }
465 else
466 {
467 const size_t ibegin( ( IsLower_v<MT1> )
468 ?( IsStrictlyLower_v<MT1> ? index+1UL : index )
469 :( 0UL ) );
470 const size_t iend( ( IsUpper_v<MT1> )
471 ?( IsStrictlyUpper_v<MT1> ? index : index+1UL )
472 :( M ) );
473 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
474
475 for( size_t i=ibegin; i<last; ++i ) {
476 y[i] += A(i,index) * element->value();
477 }
478 for( size_t i=last; i<iend; ++i ) {
479 y[i] = A(i,index) * element->value();
480 }
481
482 last = iend;
483 }
484 }
485
486 if( IsUpper_v<MT1> ) {
487 for( size_t i=last; i<M; ++i )
488 reset( y[i] );
489 }
490 }
492 //**********************************************************************************************
493
494 //**Optimized assignment to dense vectors*******************************************************
508 template< typename VT1 // Type of the left-hand side target vector
509 , typename MT1 // Type of the left-hand side matrix operand
510 , typename VT2 > // Type of the right-hand side vector operand
511 static inline auto selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
512 -> EnableIf_t< UseOptimizedKernel_v<VT1,MT1,VT2> >
513 {
514 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
515
516 const size_t M( A.rows() );
517
518 auto element( x.begin() );
519 const auto end( x.end() );
520
521 const size_t jpos( prevMultiple( x.nonZeros(), 4UL ) );
522 BLAZE_INTERNAL_ASSERT( jpos <= x.nonZeros(), "Invalid end calculation" );
523
524 if( jpos > 3UL )
525 {
526 const size_t j1( element->index() );
527 const VET v1( element->value() );
528 ++element;
529 const size_t j2( element->index() );
530 const VET v2( element->value() );
531 ++element;
532 const size_t j3( element->index() );
533 const VET v3( element->value() );
534 ++element;
535 const size_t j4( element->index() );
536 const VET v4( element->value() );
537 ++element;
538
539 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
540
541 for( size_t i=0UL; i<M; ++i ) {
542 y[i] = A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
543 }
544 }
545 else
546 {
547 const size_t j1( element->index() );
548 const VET v1( element->value() );
549 ++element;
550
551 for( size_t i=0UL; i<M; ++i ) {
552 y[i] = A(i,j1) * v1;
553 }
554 }
555
556 for( size_t j=(jpos>3UL)?(4UL):(1UL); (j+4UL)<=jpos; j+=4UL )
557 {
558 const size_t j1( element->index() );
559 const VET v1( element->value() );
560 ++element;
561 const size_t j2( element->index() );
562 const VET v2( element->value() );
563 ++element;
564 const size_t j3( element->index() );
565 const VET v3( element->value() );
566 ++element;
567 const size_t j4( element->index() );
568 const VET v4( element->value() );
569 ++element;
570
571 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
572
573 const size_t ibegin( ( IsLower_v<MT1> )
574 ?( IsStrictlyLower_v<MT1> ? j1+1UL : j1 )
575 :( 0UL ) );
576 const size_t iend( ( IsUpper_v<MT1> )
577 ?( IsStrictlyUpper_v<MT1> ? j4 : j4+1UL )
578 :( M ) );
579 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
580
581 for( size_t i=ibegin; i<iend; ++i ) {
582 y[i] += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
583 }
584 }
585 for( ; element!=end; ++element )
586 {
587 const size_t j1( element->index() );
588 const VET v1( element->value() );
589
590 const size_t ibegin( ( IsLower_v<MT1> )
591 ?( IsStrictlyLower_v<MT1> ? j1+1UL : j1 )
592 :( 0UL ) );
593 const size_t iend( ( IsUpper_v<MT1> )
594 ?( IsStrictlyUpper_v<MT1> ? j1 : j1+1UL )
595 :( M ) );
596 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
597
598 for( size_t i=ibegin; i<iend; ++i ) {
599 y[i] += A(i,j1) * v1;
600 }
601 }
602 }
604 //**********************************************************************************************
605
606 //**Vectorized assignment to dense vectors******************************************************
620 template< typename VT1 // Type of the left-hand side target vector
621 , typename MT1 // Type of the left-hand side matrix operand
622 , typename VT2 > // Type of the right-hand side vector operand
623 static inline auto selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
624 -> EnableIf_t< UseVectorizedKernel_v<VT1,MT1,VT2> >
625 {
626 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
627
628 constexpr bool remainder( !IsPadded_v<MT1> || !IsPadded_v<VT1> );
629
630 const size_t M( A.rows() );
631
632 auto element( x.begin() );
633 const auto end( x.end() );
634
635 const size_t jpos( prevMultiple( x.nonZeros(), 4UL ) );
636 BLAZE_INTERNAL_ASSERT( jpos <= x.nonZeros(), "Invalid end calculation" );
637
638 if( jpos > 3UL )
639 {
640 const size_t j1( element->index() );
641 const VET v1( element->value() );
642 ++element;
643 const size_t j2( element->index() );
644 const VET v2( element->value() );
645 ++element;
646 const size_t j3( element->index() );
647 const VET v3( element->value() );
648 ++element;
649 const size_t j4( element->index() );
650 const VET v4( element->value() );
651 ++element;
652
653 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
654
655 const SIMDType xmm1( set( v1 ) );
656 const SIMDType xmm2( set( v2 ) );
657 const SIMDType xmm3( set( v3 ) );
658 const SIMDType xmm4( set( v4 ) );
659
660 const size_t ipos( remainder ? prevMultiple( M, SIMDSIZE ) : M );
661 BLAZE_INTERNAL_ASSERT( ipos <= M, "Invalid end calculation" );
662
663 size_t i( 0UL );
664
665 for( ; i<ipos; i+=SIMDSIZE ) {
666 y.store( i, A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
667 }
668 for( ; remainder && i<M; ++i ) {
669 y[i] = A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
670 }
671 }
672 else
673 {
674 const size_t j1( element->index() );
675 const VET v1( element->value() );
676 ++element;
677
678 const SIMDType xmm1( set( v1 ) );
679
680 const size_t ipos( remainder ? prevMultiple( M, SIMDSIZE ) : M );
681 BLAZE_INTERNAL_ASSERT( ipos <= M, "Invalid end calculation" );
682
683 size_t i( 0UL );
684
685 for( ; i<ipos; i+=SIMDSIZE ) {
686 y.store( i, A.load(i,j1) * xmm1 );
687 }
688 for( ; remainder && i<M; ++i ) {
689 y[i] = A(i,j1) * v1;
690 }
691 }
692
693 for( size_t j=(jpos>3UL)?(4UL):(1UL); (j+4UL)<=jpos; j+=4UL )
694 {
695 const size_t j1( element->index() );
696 const VET v1( element->value() );
697 ++element;
698 const size_t j2( element->index() );
699 const VET v2( element->value() );
700 ++element;
701 const size_t j3( element->index() );
702 const VET v3( element->value() );
703 ++element;
704 const size_t j4( element->index() );
705 const VET v4( element->value() );
706 ++element;
707
708 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
709
710 const SIMDType xmm1( set( v1 ) );
711 const SIMDType xmm2( set( v2 ) );
712 const SIMDType xmm3( set( v3 ) );
713 const SIMDType xmm4( set( v4 ) );
714
715 const size_t ibegin( ( IsLower_v<MT1> )
716 ?( prevMultiple( ( IsStrictlyLower_v<MT1> ? j1+1UL : j1 ), SIMDSIZE ) )
717 :( 0UL ) );
718 const size_t iend( ( IsUpper_v<MT1> )
719 ?( IsStrictlyUpper_v<MT1> ? j4 : j4+1UL )
720 :( M ) );
721 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
722
723 const size_t ipos( remainder ? prevMultiple( iend, SIMDSIZE ) : iend );
724 BLAZE_INTERNAL_ASSERT( ipos <= iend, "Invalid end calculation" );
725
726 size_t i( ibegin );
727
728 for( ; i<ipos; i+=SIMDSIZE ) {
729 y.store( i, y.load(i) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
730 }
731 for( ; remainder && i<iend; ++i ) {
732 y[i] += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
733 }
734 }
735
736 for( ; element!=end; ++element )
737 {
738 const size_t j1( element->index() );
739 const VET v1( element->value() );
740
741 const SIMDType xmm1( set( v1 ) );
742
743 const size_t ibegin( ( IsLower_v<MT1> )
744 ?( prevMultiple( ( IsStrictlyLower_v<MT1> ? j1+1UL : j1 ), SIMDSIZE ) )
745 :( 0UL ) );
746 const size_t iend( ( IsUpper_v<MT1> )
747 ?( IsStrictlyUpper_v<MT1> ? j1 : j1+1UL )
748 :( M ) );
749 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
750
751 const size_t ipos( remainder ? prevMultiple( iend, SIMDSIZE ) : iend );
752 BLAZE_INTERNAL_ASSERT( ipos <= iend, "Invalid end calculation" );
753
754 size_t i( ibegin );
755
756 for( ; i<ipos; i+=SIMDSIZE ) {
757 y.store( i, y.load(i) + A.load(i,j1) * xmm1 );
758 }
759 for( ; remainder && i<iend; ++i ) {
760 y[i] += A(i,j1) * v1;
761 }
762 }
763 }
765 //**********************************************************************************************
766
767 //**Assignment to sparse vectors****************************************************************
780 template< typename VT1 > // Type of the target sparse vector
781 friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
782 {
784
788
789 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
790
791 const ResultType tmp( serial( rhs ) );
792 assign( *lhs, tmp );
793 }
795 //**********************************************************************************************
796
797 //**Addition assignment to dense vectors********************************************************
810 template< typename VT1 > // Type of the target dense vector
811 friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
812 {
814
815 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
816
817 // Evaluation of the right-hand side sparse vector operand
818 RT x( serial( rhs.vec_ ) );
819 if( x.nonZeros() == 0UL ) return;
820
821 // Evaluation of the left-hand side dense matrix operand
822 LT A( serial( rhs.mat_ ) );
823
824 // Checking the evaluated operands
825 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
826 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
827 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
828 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).size() , "Invalid vector size" );
829
830 // Performing the dense matrix-sparse vector multiplication
831 TDMatSVecMultExpr::selectAddAssignKernel( *lhs, A, x );
832 }
834 //**********************************************************************************************
835
836 //**Default addition assignment to dense vectors************************************************
850 template< typename VT1 // Type of the left-hand side target vector
851 , typename MT1 // Type of the left-hand side matrix operand
852 , typename VT2 > // Type of the right-hand side vector operand
853 static inline auto selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
854 -> EnableIf_t< UseDefaultKernel_v<VT1,MT1,VT2> >
855 {
856 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
857
858 const size_t M( A.rows() );
859
860 auto element( x.begin() );
861 const auto end( x.end() );
862
863 for( ; element!=end; ++element )
864 {
865 const size_t index( element->index() );
866
867 if( IsDiagonal_v<MT1> )
868 {
869 y[index] += A(index,index) * element->value();
870 }
871 else
872 {
873 const size_t ibegin( ( IsLower_v<MT1> )
874 ?( IsStrictlyLower_v<MT1> ? index+1UL : index )
875 :( 0UL ) );
876 const size_t iend( ( IsUpper_v<MT1> )
877 ?( IsStrictlyUpper_v<MT1> ? index : index+1UL )
878 :( M ) );
879 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
880
881 for( size_t i=ibegin; i<iend; ++i ) {
882 y[i] += A(i,index) * element->value();
883 }
884 }
885 }
886 }
888 //**********************************************************************************************
889
890 //**Optimized addition assignment to dense vectors**********************************************
904 template< typename VT1 // Type of the left-hand side target vector
905 , typename MT1 // Type of the left-hand side matrix operand
906 , typename VT2 > // Type of the right-hand side vector operand
907 static inline auto selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
908 -> EnableIf_t< UseOptimizedKernel_v<VT1,MT1,VT2> >
909 {
910 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
911
912 const size_t M( A.rows() );
913
914 auto element( x.begin() );
915 const auto end( x.end() );
916
917 const size_t jpos( prevMultiple( x.nonZeros(), 4UL ) );
918 BLAZE_INTERNAL_ASSERT( jpos <= x.nonZeros(), "Invalid end calculation" );
919
920 for( size_t j=0UL; (j+4UL)<=jpos; j+=4UL )
921 {
922 const size_t j1( element->index() );
923 const VET v1( element->value() );
924 ++element;
925 const size_t j2( element->index() );
926 const VET v2( element->value() );
927 ++element;
928 const size_t j3( element->index() );
929 const VET v3( element->value() );
930 ++element;
931 const size_t j4( element->index() );
932 const VET v4( element->value() );
933 ++element;
934
935 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
936
937 const size_t ibegin( ( IsLower_v<MT1> )
938 ?( IsStrictlyLower_v<MT1> ? j1+1UL : j1 )
939 :( 0UL ) );
940 const size_t iend( ( IsUpper_v<MT1> )
941 ?( IsStrictlyUpper_v<MT1> ? j4 : j4+1UL )
942 :( M ) );
943 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
944
945 for( size_t i=ibegin; i<iend; ++i ) {
946 y[i] += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
947 }
948 }
949 for( ; element!=end; ++element )
950 {
951 const size_t j1( element->index() );
952 const VET v1( element->value() );
953
954 const size_t ibegin( ( IsLower_v<MT1> )
955 ?( IsStrictlyLower_v<MT1> ? j1+1UL : j1 )
956 :( 0UL ) );
957 const size_t iend( ( IsUpper_v<MT1> )
958 ?( IsStrictlyUpper_v<MT1> ? j1 : j1+1UL )
959 :( M ) );
960 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
961
962 for( size_t i=ibegin; i<iend; ++i ) {
963 y[i] += A(i,j1) * v1;
964 }
965 }
966 }
968 //**********************************************************************************************
969
970 //**Vectorized addition assignment to dense vectors*********************************************
984 template< typename VT1 // Type of the left-hand side target vector
985 , typename MT1 // Type of the left-hand side matrix operand
986 , typename VT2 > // Type of the right-hand side vector operand
987 static inline auto selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
988 -> EnableIf_t< UseVectorizedKernel_v<VT1,MT1,VT2> >
989 {
990 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
991
992 constexpr bool remainder( !IsPadded_v<MT1> || !IsPadded_v<VT1> );
993
994 const size_t M( A.rows() );
995
996 auto element( x.begin() );
997 const auto end( x.end() );
998
999 const size_t jpos( prevMultiple( x.nonZeros(), 4UL ) );
1000 BLAZE_INTERNAL_ASSERT( jpos <= x.nonZeros(), "Invalid end calculation" );
1001
1002 for( size_t j=0UL; (j+4UL)<=jpos; j+=4UL )
1003 {
1004 const size_t j1( element->index() );
1005 const VET v1( element->value() );
1006 ++element;
1007 const size_t j2( element->index() );
1008 const VET v2( element->value() );
1009 ++element;
1010 const size_t j3( element->index() );
1011 const VET v3( element->value() );
1012 ++element;
1013 const size_t j4( element->index() );
1014 const VET v4( element->value() );
1015 ++element;
1016
1017 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
1018
1019 const SIMDType xmm1( set( v1 ) );
1020 const SIMDType xmm2( set( v2 ) );
1021 const SIMDType xmm3( set( v3 ) );
1022 const SIMDType xmm4( set( v4 ) );
1023
1024 const size_t ibegin( ( IsLower_v<MT1> )
1025 ?( prevMultiple( ( IsStrictlyLower_v<MT1> ? j1+1UL : j1 ), SIMDSIZE ) )
1026 :( 0UL ) );
1027 const size_t iend( ( IsUpper_v<MT1> )
1028 ?( IsStrictlyUpper_v<MT1> ? j4 : j4+1UL )
1029 :( M ) );
1030 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1031
1032 const size_t ipos( remainder ? prevMultiple( iend, SIMDSIZE ) : iend );
1033 BLAZE_INTERNAL_ASSERT( ipos <= iend, "Invalid end calculation" );
1034
1035 size_t i( ibegin );
1036
1037 for( ; i<ipos; i+=SIMDSIZE ) {
1038 y.store( i, y.load(i) + A.load(i,j1) * xmm1 + A.load(i,j2) * xmm2 + A.load(i,j3) * xmm3 + A.load(i,j4) * xmm4 );
1039 }
1040 for( ; remainder && i<iend; ++i ) {
1041 y[i] += A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1042 }
1043 }
1044 for( ; element!=end; ++element )
1045 {
1046 const size_t j1( element->index() );
1047 const VET v1( element->value() );
1048
1049 const SIMDType xmm1( set( v1 ) );
1050
1051 const size_t ibegin( ( IsLower_v<MT1> )
1052 ?( prevMultiple( ( IsStrictlyLower_v<MT1> ? j1+1UL : j1 ), SIMDSIZE ) )
1053 :( 0UL ) );
1054 const size_t iend( ( IsUpper_v<MT1> )
1055 ?( IsStrictlyUpper_v<MT1> ? j1 : j1+1UL )
1056 :( M ) );
1057 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1058
1059 const size_t ipos( remainder ? prevMultiple( iend, SIMDSIZE ) : iend );
1060 BLAZE_INTERNAL_ASSERT( ipos <= iend, "Invalid end calculation" );
1061
1062 size_t i( ibegin );
1063
1064 for( ; i<ipos; i+=SIMDSIZE ) {
1065 y.store( i, y.load(i) + A.load(i,j1) * xmm1 );
1066 }
1067 for( ; remainder && i<iend; ++i ) {
1068 y[i] += A(i,j1) * v1;
1069 }
1070 }
1071 }
1073 //**********************************************************************************************
1074
1075 //**Addition assignment to sparse vectors*******************************************************
1076 // No special implementation for the addition assignment to sparse vectors.
1077 //**********************************************************************************************
1078
1079 //**Subtraction assignment to dense vectors*****************************************************
1092 template< typename VT1 > // Type of the target dense vector
1093 friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1094 {
1096
1097 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1098
1099 // Evaluation of the right-hand side sparse vector operand
1100 RT x( serial( rhs.vec_ ) );
1101 if( x.nonZeros() == 0UL ) return;
1102
1103 // Evaluation of the left-hand side dense matrix operand
1104 LT A( serial( rhs.mat_ ) );
1105
1106 // Checking the evaluated operands
1107 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1108 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1109 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1110 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).size() , "Invalid vector size" );
1111
1112 // Performing the dense matrix-sparse vector multiplication
1113 TDMatSVecMultExpr::selectSubAssignKernel( *lhs, A, x );
1114 }
1116 //**********************************************************************************************
1117
1118 //**Default subtraction assignment to dense vectors*********************************************
1132 template< typename VT1 // Type of the left-hand side target vector
1133 , typename MT1 // Type of the left-hand side matrix operand
1134 , typename VT2 > // Type of the right-hand side vector operand
1135 static inline auto selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1136 -> EnableIf_t< UseDefaultKernel_v<VT1,MT1,VT2> >
1137 {
1138 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1139
1140 const size_t M( A.rows() );
1141
1142 auto element( x.begin() );
1143 const auto end( x.end() );
1144
1145 for( ; element!=end; ++element )
1146 {
1147 const size_t index( element->index() );
1148
1149 if( IsDiagonal_v<MT1> )
1150 {
1151 y[index] -= A(index,index) * element->value();
1152 }
1153 else
1154 {
1155 const size_t ibegin( ( IsLower_v<MT1> )
1156 ?( IsStrictlyLower_v<MT1> ? index+1UL : index )
1157 :( 0UL ) );
1158 const size_t iend( ( IsUpper_v<MT1> )
1159 ?( IsStrictlyUpper_v<MT1> ? index : index+1UL )
1160 :( M ) );
1161 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1162
1163 for( size_t i=ibegin; i<iend; ++i ) {
1164 y[i] -= A(i,index) * element->value();
1165 }
1166 }
1167 }
1168 }
1170 //**********************************************************************************************
1171
1172 //**Optimized subtraction assignment to dense vectors*******************************************
1186 template< typename VT1 // Type of the left-hand side target vector
1187 , typename MT1 // Type of the left-hand side matrix operand
1188 , typename VT2 > // Type of the right-hand side vector operand
1189 static inline auto selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1190 -> EnableIf_t< UseOptimizedKernel_v<VT1,MT1,VT2> >
1191 {
1192 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1193
1194 const size_t M( A.rows() );
1195
1196 auto element( x.begin() );
1197 const auto end( x.end() );
1198
1199 const size_t jpos( prevMultiple( x.nonZeros(), 4UL ) );
1200 BLAZE_INTERNAL_ASSERT( jpos <= x.nonZeros(), "Invalid end calculation" );
1201
1202 for( size_t j=0UL; (j+4UL)<=jpos; j+=4UL )
1203 {
1204 const size_t j1( element->index() );
1205 const VET v1( element->value() );
1206 ++element;
1207 const size_t j2( element->index() );
1208 const VET v2( element->value() );
1209 ++element;
1210 const size_t j3( element->index() );
1211 const VET v3( element->value() );
1212 ++element;
1213 const size_t j4( element->index() );
1214 const VET v4( element->value() );
1215 ++element;
1216
1217 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
1218
1219 const size_t ibegin( ( IsLower_v<MT1> )
1220 ?( IsStrictlyLower_v<MT1> ? j1+1UL : j1 )
1221 :( 0UL ) );
1222 const size_t iend( ( IsUpper_v<MT1> )
1223 ?( IsStrictlyUpper_v<MT1> ? j4 : j4+1UL )
1224 :( M ) );
1225 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1226
1227 for( size_t i=ibegin; i<iend; ++i ) {
1228 y[i] -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1229 }
1230 }
1231 for( ; element!=end; ++element )
1232 {
1233 const size_t j1( element->index() );
1234 const VET v1( element->value() );
1235
1236 const size_t ibegin( ( IsLower_v<MT1> )
1237 ?( IsStrictlyLower_v<MT1> ? j1+1UL : j1 )
1238 :( 0UL ) );
1239 const size_t iend( ( IsUpper_v<MT1> )
1240 ?( IsStrictlyUpper_v<MT1> ? j1 : j1+1UL )
1241 :( M ) );
1242 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1243
1244 for( size_t i=ibegin; i<iend; ++i ) {
1245 y[i] -= A(i,j1) * v1;
1246 }
1247 }
1248 }
1250 //**********************************************************************************************
1251
1252 //**Vectorized subtraction assignment to dense vectors******************************************
1266 template< typename VT1 // Type of the left-hand side target vector
1267 , typename MT1 // Type of the left-hand side matrix operand
1268 , typename VT2 > // Type of the right-hand side vector operand
1269 static inline auto selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1270 -> EnableIf_t< UseVectorizedKernel_v<VT1,MT1,VT2> >
1271 {
1272 BLAZE_INTERNAL_ASSERT( x.nonZeros() != 0UL, "Invalid number of non-zero elements" );
1273
1274 constexpr bool remainder( !IsPadded_v<MT1> || !IsPadded_v<VT1> );
1275
1276 const size_t M( A.rows() );
1277
1278 auto element( x.begin() );
1279 const auto end( x.end() );
1280
1281 const size_t jpos( prevMultiple( x.nonZeros(), 4UL ) );
1282 BLAZE_INTERNAL_ASSERT( jpos <= x.nonZeros(), "Invalid end calculation" );
1283
1284 for( size_t j=0UL; (j+4UL)<=jpos; j+=4UL )
1285 {
1286 const size_t j1( element->index() );
1287 const VET v1( element->value() );
1288 ++element;
1289 const size_t j2( element->index() );
1290 const VET v2( element->value() );
1291 ++element;
1292 const size_t j3( element->index() );
1293 const VET v3( element->value() );
1294 ++element;
1295 const size_t j4( element->index() );
1296 const VET v4( element->value() );
1297 ++element;
1298
1299 BLAZE_INTERNAL_ASSERT( j1 < j2 && j2 < j3 && j3 < j4, "Invalid sparse vector index detected" );
1300
1301 const SIMDType xmm1( set( v1 ) );
1302 const SIMDType xmm2( set( v2 ) );
1303 const SIMDType xmm3( set( v3 ) );
1304 const SIMDType xmm4( set( v4 ) );
1305
1306 const size_t ibegin( ( IsLower_v<MT1> )
1307 ?( prevMultiple( ( IsStrictlyLower_v<MT1> ? j1+1UL : j1 ), SIMDSIZE ) )
1308 :( 0UL ) );
1309 const size_t iend( ( IsUpper_v<MT1> )
1310 ?( IsStrictlyUpper_v<MT1> ? j4 : j4+1UL )
1311 :( M ) );
1312 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1313
1314 const size_t ipos( remainder ? prevMultiple( iend, SIMDSIZE ) : iend );
1315 BLAZE_INTERNAL_ASSERT( ipos <= iend, "Invalid end calculation" );
1316
1317 size_t i( ibegin );
1318
1319 for( ; i<ipos; i+=SIMDSIZE ) {
1320 y.store( i, y.load(i) - A.load(i,j1) * xmm1 - A.load(i,j2) * xmm2 - A.load(i,j3) * xmm3 - A.load(i,j4) * xmm4 );
1321 }
1322 for( ; remainder && i<iend; ++i ) {
1323 y[i] -= A(i,j1) * v1 + A(i,j2) * v2 + A(i,j3) * v3 + A(i,j4) * v4;
1324 }
1325 }
1326 for( ; element!=end; ++element )
1327 {
1328 const size_t j1( element->index() );
1329 const VET v1( element->value() );
1330
1331 const SIMDType xmm1( set( v1 ) );
1332
1333 const size_t ibegin( ( IsLower_v<MT1> )
1334 ?( prevMultiple( ( IsStrictlyLower_v<MT1> ? j1+1UL : j1 ), SIMDSIZE ) )
1335 :( 0UL ) );
1336 const size_t iend( ( IsUpper_v<MT1> )
1337 ?( IsStrictlyUpper_v<MT1> ? j1 : j1+1UL )
1338 :( M ) );
1339 BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1340
1341 const size_t ipos( remainder ? prevMultiple( iend, SIMDSIZE ) : iend );
1342 BLAZE_INTERNAL_ASSERT( ipos <= iend, "Invalid end calculation" );
1343
1344 size_t i( ibegin );
1345
1346 for( ; i<ipos; i+=SIMDSIZE ) {
1347 y.store( i, y.load(i) - A.load(i,j1) * xmm1 );
1348 }
1349 for( ; remainder && i<iend; ++i ) {
1350 y[i] -= A(i,j1) * v1;
1351 }
1352 }
1353 }
1355 //**********************************************************************************************
1356
1357 //**Subtraction assignment to sparse vectors****************************************************
1358 // No special implementation for the subtraction assignment to sparse vectors.
1359 //**********************************************************************************************
1360
1361 //**Multiplication assignment to dense vectors**************************************************
1374 template< typename VT1 > // Type of the target dense vector
1375 friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1376 {
1378
1382
1383 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1384
1385 const ResultType tmp( serial( rhs ) );
1386 multAssign( *lhs, tmp );
1387 }
1389 //**********************************************************************************************
1390
1391 //**Multiplication assignment to sparse vectors*************************************************
1392 // No special implementation for the multiplication assignment to sparse vectors.
1393 //**********************************************************************************************
1394
1395 //**Division assignment to dense vectors********************************************************
1408 template< typename VT1 > // Type of the target dense vector
1409 friend inline void divAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1410 {
1412
1416
1417 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1418
1419 const ResultType tmp( serial( rhs ) );
1420 divAssign( *lhs, tmp );
1421 }
1423 //**********************************************************************************************
1424
1425 //**Division assignment to sparse vectors*******************************************************
1426 // No special implementation for the division assignment to sparse vectors.
1427 //**********************************************************************************************
1428
1429 //**SMP assignment to dense vectors*************************************************************
1444 template< typename VT1 > // Type of the target dense vector
1445 friend inline auto smpAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1446 -> EnableIf_t< UseSMPAssign_v<VT1> >
1447 {
1449
1450 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1451
1452 // Evaluation of the right-hand side sparse vector operand
1453 RT x( rhs.vec_ );
1454 if( x.nonZeros() == 0UL ) {
1455 reset( *lhs );
1456 return;
1457 }
1458
1459 // Evaluation of the left-hand side dense matrix operand
1460 LT A( rhs.mat_ );
1461
1462 // Checking the evaluated operands
1463 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1464 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1465 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1466 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).size() , "Invalid vector size" );
1467
1468 // Performing the dense matrix-sparse vector multiplication
1469 smpAssign( *lhs, A * x );
1470 }
1472 //**********************************************************************************************
1473
1474 //**SMP assignment to sparse vectors************************************************************
1489 template< typename VT1 > // Type of the target sparse vector
1490 friend inline auto smpAssign( SparseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1491 -> EnableIf_t< UseSMPAssign_v<VT1> >
1492 {
1494
1498
1499 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1500
1501 const ResultType tmp( rhs );
1502 smpAssign( *lhs, tmp );
1503 }
1505 //**********************************************************************************************
1506
1507 //**SMP addition assignment to dense vectors****************************************************
1522 template< typename VT1 > // Type of the target dense vector
1523 friend inline auto smpAddAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1524 -> EnableIf_t< UseSMPAssign_v<VT1> >
1525 {
1527
1528 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1529
1530 // Evaluation of the right-hand side sparse vector operand
1531 RT x( rhs.vec_ );
1532 if( x.nonZeros() == 0UL ) return;
1533
1534 // Evaluation of the left-hand side dense matrix operand
1535 LT A( rhs.mat_ );
1536
1537 // Checking the evaluated operands
1538 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1539 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1540 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1541 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).size() , "Invalid vector size" );
1542
1543 // Performing the dense matrix-sparse vector multiplication
1544 smpAddAssign( *lhs, A * x );
1545 }
1547 //**********************************************************************************************
1548
1549 //**SMP addition assignment to sparse vectors***************************************************
1550 // No special implementation for the SMP addition assignment to sparse vectors.
1551 //**********************************************************************************************
1552
1553 //**SMP subtraction assignment to dense vectors*************************************************
1568 template< typename VT1 > // Type of the target dense vector
1569 friend inline auto smpSubAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1570 -> EnableIf_t< UseSMPAssign_v<VT1> >
1571 {
1573
1574 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1575
1576 // Evaluation of the right-hand side sparse vector operand
1577 RT x( rhs.vec_ );
1578 if( x.nonZeros() == 0UL ) return;
1579
1580 // Evaluation of the left-hand side dense matrix operand
1581 LT A( rhs.mat_ );
1582
1583 // Checking the evaluated operands
1584 BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1585 BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1586 BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1587 BLAZE_INTERNAL_ASSERT( A.rows() == (*lhs).size() , "Invalid vector size" );
1588
1589 // Performing the dense matrix-sparse vector multiplication
1590 smpSubAssign( *lhs, A * x );
1591 }
1593 //**********************************************************************************************
1594
1595 //**SMP subtraction assignment to sparse vectors************************************************
1596 // No special implementation for the SMP subtraction assignment to sparse vectors.
1597 //**********************************************************************************************
1598
1599 //**SMP multiplication assignment to dense vectors**********************************************
1614 template< typename VT1 > // Type of the target dense vector
1615 friend inline auto smpMultAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1616 -> EnableIf_t< UseSMPAssign_v<VT1> >
1617 {
1619
1623
1624 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1625
1626 const ResultType tmp( rhs );
1627 smpMultAssign( *lhs, tmp );
1628 }
1630 //**********************************************************************************************
1631
1632 //**SMP multiplication assignment to sparse vectors*********************************************
1633 // No special implementation for the SMP multiplication assignment to sparse vectors.
1634 //**********************************************************************************************
1635
1636 //**SMP division assignment to dense vectors****************************************************
1651 template< typename VT1 > // Type of the target dense vector
1652 friend inline auto smpDivAssign( DenseVector<VT1,false>& lhs, const TDMatSVecMultExpr& rhs )
1653 -> EnableIf_t< UseSMPAssign_v<VT1> >
1654 {
1656
1660
1661 BLAZE_INTERNAL_ASSERT( (*lhs).size() == rhs.size(), "Invalid vector sizes" );
1662
1663 const ResultType tmp( rhs );
1664 smpDivAssign( *lhs, tmp );
1665 }
1667 //**********************************************************************************************
1668
1669 //**SMP division assignment to sparse vectors***************************************************
1670 // No special implementation for the SMP division assignment to sparse vectors.
1671 //**********************************************************************************************
1672
1673 //**Compile time checks*************************************************************************
1682 //**********************************************************************************************
1683};
1684//*************************************************************************************************
1685
1686
1687
1688
1689//=================================================================================================
1690//
1691// GLOBAL BINARY ARITHMETIC OPERATORS
1692//
1693//=================================================================================================
1694
1695//*************************************************************************************************
1708template< typename MT // Type of the left-hand side dense matrix
1709 , typename VT // Type of the right-hand side sparse vector
1710 , DisableIf_t< IsZero_v<VT> >* = nullptr >
1711inline const TDMatSVecMultExpr<MT,VT>
1712 tdmatsvecmult( const DenseMatrix<MT,true>& mat, const SparseVector<VT,false>& vec )
1713{
1715
1716 BLAZE_INTERNAL_ASSERT( (*mat).columns() == (*vec).size(), "Invalid matrix and vector sizes" );
1717
1718 return TDMatSVecMultExpr<MT,VT>( *mat, *vec );
1719}
1721//*************************************************************************************************
1722
1723
1724//*************************************************************************************************
1737template< typename MT // Type of the left-hand side dense matrix
1738 , typename VT // Type of the right-hand side sparse vector
1739 , EnableIf_t< IsZero_v<VT> >* = nullptr >
1740inline decltype(auto)
1741 tdmatsvecmult( const DenseMatrix<MT,true>& mat, const SparseVector<VT,false>& vec )
1742{
1744
1745 MAYBE_UNUSED( vec );
1746
1747 BLAZE_INTERNAL_ASSERT( (*mat).columns() == (*vec).size(), "Invalid matrix and vector sizes" );
1748
1749 using ReturnType = const MultTrait_t< ResultType_t<MT>, ResultType_t<VT> >;
1750
1753
1754 return ReturnType( (*mat).rows() );
1755}
1757//*************************************************************************************************
1758
1759
1760//*************************************************************************************************
1792template< typename MT // Type of the left-hand side dense matrix
1793 , typename VT > // Type of the right-hand side sparse vector
1794inline decltype(auto)
1795 operator*( const DenseMatrix<MT,true>& mat, const SparseVector<VT,false>& vec )
1796{
1798
1800
1801 if( (*mat).columns() != (*vec).size() ) {
1802 BLAZE_THROW_INVALID_ARGUMENT( "Matrix and vector sizes do not match" );
1803 }
1804
1805 return tdmatsvecmult( *mat, *vec );
1806}
1807//*************************************************************************************************
1808
1809
1810
1811
1812//=================================================================================================
1813//
1814// ISALIGNED SPECIALIZATIONS
1815//
1816//=================================================================================================
1817
1818//*************************************************************************************************
1820template< typename MT, typename VT >
1821struct IsAligned< TDMatSVecMultExpr<MT,VT> >
1822 : public IsAligned<MT>
1823{};
1825//*************************************************************************************************
1826
1827} // namespace blaze
1828
1829#endif
Header file for auxiliary alias declarations.
typename T::CompositeType CompositeType_t
Alias declaration for nested CompositeType type definitions.
Definition: Aliases.h:110
typename T::ResultType ResultType_t
Alias declaration for nested ResultType type definitions.
Definition: Aliases.h:450
typename T::ElementType ElementType_t
Alias declaration for nested ElementType type definitions.
Definition: Aliases.h:190
typename T::TransposeType TransposeType_t
Alias declaration for nested TransposeType type definitions.
Definition: Aliases.h:550
Header file for run time assertion macros.
Header file for the blaze::checked and blaze::unchecked instances.
Constraints on the storage order of matrix types.
Constraint on the transpose flag of vector types.
Header file for the EnableIf class template.
Header file for the function trace functionality.
Header file for the HasSIMDAdd type trait.
Header file for the HasSIMDMult type trait.
Header file for the If class template.
Header file for the IsAligned type trait.
Header file for the IsComputation type trait class.
Header file for the IsDiagonal type trait.
Header file for the IsExpression type trait class.
Header file for the IsLower type trait.
Header file for the IsPadded type trait.
Header file for the IsResizable type trait.
Header file for the IsSIMDCombinable type trait.
Header file for the IsStrictlyLower type trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsUpper type trait.
Deactivation of problematic macros.
Header file for the MAYBE_UNUSED function template.
Header file for the multiplication trait.
Header file for the prevMultiple shim.
Header file for all SIMD functionality.
Constraint on the data type.
Base class for dense matrices.
Definition: DenseMatrix.h:82
Base class for N-dimensional dense vectors.
Definition: DenseVector.h:77
SIMD characteristics of data types.
Definition: SIMDTrait.h:297
Base class for sparse vectors.
Definition: SparseVector.h:72
Expression object for transpose dense matrix-sparse vector multiplications.
Definition: TDMatSVecMultExpr.h:111
RightOperand rightOperand() const noexcept
Returns the right-hand side sparse vector operand.
Definition: TDMatSVecMultExpr.h:321
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatSVecMultExpr.h:365
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatSVecMultExpr.h:372
TransposeType_t< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatSVecMultExpr.h:197
TDMatSVecMultExpr(const MT &mat, const VT &vec) noexcept
Constructor for the TDMatSVecMultExpr class.
Definition: TDMatSVecMultExpr.h:240
If_t< IsExpression_v< MT >, const MT, const MT & > LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatSVecMultExpr.h:204
If_t< IsExpression_v< VT >, const VT, const VT & > RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatSVecMultExpr.h:207
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatSVecMultExpr.h:254
ElementType_t< ResultType > ElementType
Resulting element type.
Definition: TDMatSVecMultExpr.h:198
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatSVecMultExpr.h:345
static constexpr bool evaluateVector
Compilation switch for the composite type of the right-hand side dense vector expression.
Definition: TDMatSVecMultExpr.h:129
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatSVecMultExpr.h:201
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatSVecMultExpr.h:333
ElementType_t< MRT > MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatSVecMultExpr.h:116
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDMatSVecMultExpr.h:288
CompositeType_t< MT > MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatSVecMultExpr.h:118
static constexpr bool evaluateMatrix
Compilation switch for the composite type of the left-hand side dense matrix expression.
Definition: TDMatSVecMultExpr.h:124
static constexpr bool simdEnabled
Compilation switch for the expression template evaluation strategy.
Definition: TDMatSVecMultExpr.h:218
ResultType_t< MT > MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatSVecMultExpr.h:114
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatSVecMultExpr.h:200
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatSVecMultExpr.h:355
static constexpr size_t SIMDSIZE
The number of elements packed within a single SIMD element.
Definition: TDMatSVecMultExpr.h:231
If_t< evaluateMatrix, const MRT, MCT > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatSVecMultExpr.h:210
ResultType_t< VT > VRT
Result type of the right-hand side sparse vector expression.
Definition: TDMatSVecMultExpr.h:115
CompositeType_t< VT > VCT
Composite type of the right-hand side sparse vector expression.
Definition: TDMatSVecMultExpr.h:119
size_t size() const noexcept
Returns the current size/dimension of the vector.
Definition: TDMatSVecMultExpr.h:301
If_t< evaluateVector, const VRT, CompositeType_t< VT > > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatSVecMultExpr.h:213
RightOperand vec_
Right-hand side sparse vector of the multiplication expression.
Definition: TDMatSVecMultExpr.h:373
SIMDTrait_t< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatSVecMultExpr.h:199
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatSVecMultExpr.h:311
static constexpr bool smpAssignable
Compilation switch for the expression template assignment strategy.
Definition: TDMatSVecMultExpr.h:225
MultTrait_t< MRT, VRT > ResultType
Result type for expression template evaluations.
Definition: TDMatSVecMultExpr.h:196
ElementType_t< VRT > VET
Element type of the right-hand side sparse vector expression.
Definition: TDMatSVecMultExpr.h:117
Constraint on the data type.
Constraint on the data type.
Constraint on the data type.
Constraint on the data type.
Constraint on the data type.
Constraint on the data type.
Header file for the Computation base class.
Header file for the DenseVector base class.
Header file for the MatVecMultExpr base class.
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:812
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.
Definition: RequiresEvaluation.h:81
#define BLAZE_CONSTRAINT_MUST_NOT_BE_MATMATMULTEXPR_TYPE(T)
Constraint on the data type.
Definition: MatMatMultExpr.h:83
#define BLAZE_CONSTRAINT_MUST_NOT_BE_ZERO_TYPE(T)
Constraint on the data type.
Definition: Zero.h:81
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.
Definition: DenseMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.
Definition: MatVecMultExpr.h:104
#define BLAZE_CONSTRAINT_MUST_BE_SPARSE_VECTOR_TYPE(T)
Constraint on the data type.
Definition: SparseVector.h:61
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.
Definition: DenseVector.h:61
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.
Definition: ColumnVector.h:61
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.
Definition: ColumnMajorMatrix.h:61
#define BLAZE_CONSTRAINT_MUST_BE_ZERO_TYPE(T)
Constraint on the data type.
Definition: Zero.h:61
typename MultTrait< T1, T2 >::Type MultTrait_t
Auxiliary alias declaration for the MultTrait class template.
Definition: MultTrait.h:165
BLAZE_ALWAYS_INLINE constexpr auto prevMultiple(T1 value, T2 factor) noexcept
Rounds down an integral value to the previous multiple of a given factor.
Definition: PrevMultiple.h:68
constexpr void reset(Matrix< MT, SO > &matrix)
Resetting the given matrix.
Definition: Matrix.h:806
MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:584
MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:518
decltype(auto) row(Matrix< MT, SO > &, RRAs...)
Creating a view on a specific row of the given matrix.
Definition: Row.h:137
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.
Definition: Assert.h:101
BLAZE_ALWAYS_INLINE const EnableIf_t< IsIntegral_v< T > &&HasSize_v< T, 1UL >, If_t< IsSigned_v< T >, SIMDint8, SIMDuint8 > > set(T value) noexcept
Sets all values in the vector to the given 1-byte integral value.
Definition: Set.h:75
typename SIMDTrait< T >::Type SIMDTrait_t
Auxiliary alias declaration for the SIMDTrait class template.
Definition: SIMDTrait.h:315
auto smpDivAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP division assignment of a vector to a dense vector.
Definition: DenseVector.h:221
auto smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:162
auto smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:100
auto smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs) -> EnableIf_t< IsDenseVector_v< VT1 > >
Default implementation of the SMP multiplication assignment of a vector to a dense vector.
Definition: DenseVector.h:192
auto smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs) -> EnableIf_t< IsDenseMatrix_v< MT1 > >
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:131
decltype(auto) subvector(Vector< VT, TF > &, RSAs...)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:158
typename EnableIf< Condition, T >::Type EnableIf_t
Auxiliary type for the EnableIf class template.
Definition: EnableIf.h:138
constexpr void MAYBE_UNUSED(const Args &...)
Suppression of unused parameter warnings.
Definition: MaybeUnused.h:81
typename If< Condition >::template Type< T1, T2 > If_t
Auxiliary alias template for the If class template.
Definition: If.h:108
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.
Definition: Exception.h:331
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.
Definition: Exception.h:235
#define BLAZE_FUNCTION_TRACE
Function trace macro.
Definition: FunctionTrace.h:94
constexpr Unchecked unchecked
Global Unchecked instance.
Definition: Check.h:146
Header file for the exception macros of the math module.
Header file for all forward declarations for expression class templates.
Header file for the reset shim.
Header file for the serial shim.
Base class for all compute expression templates.
Definition: Computation.h:68
Base class for all matrix/vector multiplication expression templates.
Definition: MatVecMultExpr.h:69
System settings for performance optimizations.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
Header file for the IsZero type trait.
Header file for the RequiresEvaluation type trait.
Header file for basic type definitions.