35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
132 template<
typename MT1
166 template<
typename T1,
typename T2,
typename T3 >
167 struct CanExploitSymmetry {
168 enum { value = IsColumnMajorMatrix<T1>::value &&
169 ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
180 template<
typename T1,
typename T2,
typename T3 >
181 struct IsEvaluationRequired {
182 enum { value = ( evaluateLeft || evaluateRight ) &&
183 !CanExploitSymmetry<T1,T2,T3>::value };
193 template<
typename T1,
typename T2,
typename T3 >
194 struct UseSinglePrecisionKernel {
196 HasMutableDataAccess<T1>::value &&
197 HasConstDataAccess<T2>::value &&
198 HasConstDataAccess<T3>::value &&
199 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
200 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
201 IsFloat<typename T1::ElementType>::value &&
202 IsFloat<typename T2::ElementType>::value &&
203 IsFloat<typename T3::ElementType>::value };
213 template<
typename T1,
typename T2,
typename T3 >
214 struct UseDoublePrecisionKernel {
216 HasMutableDataAccess<T1>::value &&
217 HasConstDataAccess<T2>::value &&
218 HasConstDataAccess<T3>::value &&
219 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
220 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
221 IsDouble<typename T1::ElementType>::value &&
222 IsDouble<typename T2::ElementType>::value &&
223 IsDouble<typename T3::ElementType>::value };
234 template<
typename T1,
typename T2,
typename T3 >
235 struct UseSinglePrecisionComplexKernel {
236 typedef complex<float> Type;
238 HasMutableDataAccess<T1>::value &&
239 HasConstDataAccess<T2>::value &&
240 HasConstDataAccess<T3>::value &&
241 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
242 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
243 IsSame<typename T1::ElementType,Type>::value &&
244 IsSame<typename T2::ElementType,Type>::value &&
245 IsSame<typename T3::ElementType,Type>::value };
256 template<
typename T1,
typename T2,
typename T3 >
257 struct UseDoublePrecisionComplexKernel {
258 typedef complex<double> Type;
260 HasMutableDataAccess<T1>::value &&
261 HasConstDataAccess<T2>::value &&
262 HasConstDataAccess<T3>::value &&
263 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
264 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
265 IsSame<typename T1::ElementType,Type>::value &&
266 IsSame<typename T2::ElementType,Type>::value &&
267 IsSame<typename T3::ElementType,Type>::value };
277 template<
typename T1,
typename T2,
typename T3 >
278 struct UseDefaultKernel {
279 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
280 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
281 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
282 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
292 template<
typename T1,
typename T2,
typename T3 >
293 struct UseVectorizedDefaultKernel {
294 enum { value = !IsDiagonal<T3>::value &&
295 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
296 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
297 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
298 IntrinsicTrait<typename T1::ElementType>::addition &&
299 IntrinsicTrait<typename T1::ElementType>::subtraction &&
300 IntrinsicTrait<typename T1::ElementType>::multiplication };
332 MT1::vectorizable && MT2::vectorizable &&
338 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
339 !evaluateRight && MT2::smpAssignable };
382 :(
lhs_.columns() ) ) );
384 if(
lhs_.columns() == 0UL ||
394 const size_t knum( kend - kbegin );
395 const size_t kpos( kbegin + ( ( knum - 1UL ) &
size_t(-2) ) + 1UL );
397 ElementType tmp(
lhs_(i,kbegin) *
rhs_(kbegin,j) );
399 for(
size_t k=kbegin+1UL; k<kpos; k+=2UL ) {
401 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
427 return rhs_.columns();
457 template<
typename T >
459 return (
lhs_.canAlias( alias ) ||
rhs_.canAlias( alias ) );
469 template<
typename T >
471 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
481 return lhs_.isAligned() &&
rhs_.isAligned();
516 template<
typename MT
526 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
529 else if( rhs.
lhs_.columns() == 0UL ) {
544 DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
560 template<
typename MT3
563 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
567 selectSmallAssignKernel( C, A, B );
569 selectBlasAssignKernel( C, A, B );
588 template<
typename MT3
591 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
592 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
594 const size_t M( A.rows() );
595 const size_t N( B.columns() );
596 const size_t K( A.columns() );
598 for(
size_t i=0UL; i<M; ++i )
600 const size_t kbegin( ( IsUpper<MT4>::value )
601 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
603 const size_t kend( ( IsLower<MT4>::value )
604 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
608 if( IsStrictlyTriangular<MT4>::value && kbegin == kend ) {
609 for(
size_t j=0UL; j<N; ++j ) {
616 const size_t jbegin( ( IsUpper<MT5>::value )
617 ?( IsStrictlyUpper<MT5>::value ? kbegin+1UL : kbegin )
619 const size_t jend( ( IsLower<MT5>::value )
620 ?( IsStrictlyLower<MT5>::value ? kbegin : kbegin+1UL )
624 if( IsUpper<MT4>::value && IsUpper<MT5>::value ) {
625 for(
size_t j=0UL; j<jbegin; ++j ) {
629 else if( IsStrictlyUpper<MT5>::value ) {
632 for(
size_t j=jbegin; j<jend; ++j ) {
633 C(i,j) = A(i,kbegin) * B(kbegin,j);
635 if( IsLower<MT4>::value && IsLower<MT5>::value ) {
636 for(
size_t j=jend; j<N; ++j ) {
640 else if( IsStrictlyLower<MT5>::value ) {
645 for(
size_t k=kbegin+1UL; k<kend; ++k )
647 const size_t jbegin( ( IsUpper<MT5>::value )
648 ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
650 const size_t jend( ( IsLower<MT5>::value )
651 ?( IsStrictlyLower<MT5>::value ? k-1UL : k )
655 for(
size_t j=jbegin; j<jend; ++j ) {
656 C(i,j) += A(i,k) * B(k,j);
658 if( IsLower<MT5>::value ) {
659 C(i,jend) = A(i,k) * B(k,jend);
681 template<
typename MT3
684 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
685 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
689 const size_t M( A.rows() );
690 const size_t N( B.columns() );
692 for(
size_t i=0UL; i<M; ++i )
694 const size_t jbegin( ( IsUpper<MT4>::value )
695 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
697 const size_t jend( ( IsLower<MT4>::value )
698 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
702 if( IsUpper<MT4>::value ) {
703 for(
size_t j=0UL; j<jbegin; ++j ) {
707 for(
size_t j=jbegin; j<jend; ++j ) {
708 C(i,j) = A(i,j) * B(j,j);
710 if( IsLower<MT4>::value ) {
711 for(
size_t j=jend; j<N; ++j ) {
734 template<
typename MT3
737 static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
738 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
742 const size_t M( A.rows() );
743 const size_t N( B.columns() );
745 for(
size_t i=0UL; i<M; ++i )
747 const size_t jbegin( ( IsUpper<MT5>::value )
748 ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
750 const size_t jend( ( IsLower<MT5>::value )
751 ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
755 if( IsUpper<MT5>::value ) {
756 for(
size_t j=0UL; j<jbegin; ++j ) {
760 for(
size_t j=jbegin; j<jend; ++j ) {
761 C(i,j) = A(i,i) * B(i,j);
763 if( IsLower<MT5>::value ) {
764 for(
size_t j=jend; j<N; ++j ) {
787 template<
typename MT3
790 static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
791 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
797 for(
size_t i=0UL; i<A.rows(); ++i ) {
798 C(i,i) = A(i,i) * B(i,i);
817 template<
typename MT3
820 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
821 selectSmallAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
823 selectDefaultAssignKernel( C, A, B );
843 template<
typename MT3
846 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
847 selectSmallAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
849 typedef IntrinsicTrait<ElementType> IT;
851 const size_t M( A.rows() );
852 const size_t N( B.columns() );
853 const size_t K( A.columns() );
858 for(
size_t i=0UL; i<M; ++i )
860 const size_t kbegin( ( IsUpper<MT4>::value )
861 ?( ( IsLower<MT5>::value )
862 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
863 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
864 :( IsLower<MT5>::value ? j : 0UL ) );
865 const size_t kend( ( IsLower<MT4>::value )
866 ?( ( IsUpper<MT5>::value )
867 ?(
min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+
IT::size*8UL, K ) )
868 :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
869 :( IsUpper<MT5>::value ?
min( j+
IT::size*8UL, K ) : K ) );
871 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
873 for(
size_t k=kbegin; k<kend; ++k ) {
874 const IntrinsicType a1(
set( A(i,k) ) );
875 xmm1 = xmm1 + a1 * B.load(k,j );
876 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
877 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
878 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
879 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
880 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
881 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
882 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
885 (~C).
store( i, j , xmm1 );
900 for( ; (i+2UL) <= M; i+=2UL )
902 const size_t kbegin( ( IsUpper<MT4>::value )
903 ?( ( IsLower<MT5>::value )
904 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
905 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
906 :( IsLower<MT5>::value ? j : 0UL ) );
907 const size_t kend( ( IsLower<MT4>::value )
908 ?( ( IsUpper<MT5>::value )
909 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*4UL, K ) )
910 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
911 :( IsUpper<MT5>::value ?
min( j+
IT::size*4UL, K ) : K ) );
913 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
915 for(
size_t k=kbegin; k<kend; ++k ) {
916 const IntrinsicType a1(
set( A(i ,k) ) );
917 const IntrinsicType a2(
set( A(i+1UL,k) ) );
918 const IntrinsicType b1( B.load(k,j ) );
919 const IntrinsicType b2( B.load(k,j+
IT::size ) );
920 const IntrinsicType b3( B.load(k,j+
IT::size*2UL) );
921 const IntrinsicType b4( B.load(k,j+
IT::size*3UL) );
922 xmm1 = xmm1 + a1 * b1;
923 xmm2 = xmm2 + a1 * b2;
924 xmm3 = xmm3 + a1 * b3;
925 xmm4 = xmm4 + a1 * b4;
926 xmm5 = xmm5 + a2 * b1;
927 xmm6 = xmm6 + a2 * b2;
928 xmm7 = xmm7 + a2 * b3;
929 xmm8 = xmm8 + a2 * b4;
932 (~C).
store( i , j , xmm1 );
936 (~C).
store( i+1UL, j , xmm5 );
944 const size_t kbegin( ( IsUpper<MT4>::value )
945 ?( ( IsLower<MT5>::value )
946 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
947 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
948 :( IsLower<MT5>::value ? j : 0UL ) );
949 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, K ) ):( K ) );
951 IntrinsicType xmm1, xmm2, xmm3, xmm4;
953 for(
size_t k=kbegin; k<kend; ++k ) {
954 const IntrinsicType a1(
set( A(i,k) ) );
955 xmm1 = xmm1 + a1 * B.load(k,j );
956 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
957 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
958 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
961 (~C).
store( i, j , xmm1 );
972 for( ; (i+2UL) <= M; i+=2UL )
974 const size_t kbegin( ( IsUpper<MT4>::value )
975 ?( ( IsLower<MT5>::value )
976 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
977 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
978 :( IsLower<MT5>::value ? j : 0UL ) );
979 const size_t kend( ( IsLower<MT4>::value )
980 ?( ( IsUpper<MT5>::value )
981 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*2UL, K ) )
982 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
983 :( IsUpper<MT5>::value ?
min( j+
IT::size*2UL, K ) : K ) );
985 IntrinsicType xmm1, xmm2, xmm3, xmm4;
987 for(
size_t k=kbegin; k<kend; ++k ) {
988 const IntrinsicType a1(
set( A(i ,k) ) );
989 const IntrinsicType a2(
set( A(i+1UL,k) ) );
990 const IntrinsicType b1( B.load(k,j ) );
991 const IntrinsicType b2( B.load(k,j+
IT::size) );
992 xmm1 = xmm1 + a1 * b1;
993 xmm2 = xmm2 + a1 * b2;
994 xmm3 = xmm3 + a2 * b1;
995 xmm4 = xmm4 + a2 * b2;
998 (~C).
store( i , j , xmm1 );
1000 (~C).
store( i+1UL, j , xmm3 );
1006 const size_t kbegin( ( IsUpper<MT4>::value )
1007 ?( ( IsLower<MT5>::value )
1008 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1009 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1010 :( IsLower<MT5>::value ? j : 0UL ) );
1011 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, K ) ):( K ) );
1013 IntrinsicType xmm1, xmm2;
1015 for(
size_t k=kbegin; k<kend; ++k ) {
1016 const IntrinsicType a1(
set( A(i,k) ) );
1017 xmm1 = xmm1 + a1 * B.load(k,j );
1018 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
1021 (~C).
store( i, j , xmm1 );
1030 for( ; (i+2UL) <= M; i+=2UL )
1032 const size_t kbegin( ( IsUpper<MT4>::value )
1033 ?( ( IsLower<MT5>::value )
1034 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1035 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1036 :( IsLower<MT5>::value ? j : 0UL ) );
1037 const size_t kend( ( IsLower<MT4>::value )
1038 ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
1041 IntrinsicType xmm1, xmm2;
1043 for(
size_t k=kbegin; k<kend; ++k ) {
1044 const IntrinsicType b1( B.load(k,j) );
1045 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
1046 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
1049 (~C).
store( i , j, xmm1 );
1050 (~C).
store( i+1UL, j, xmm2 );
1055 const size_t kbegin( ( IsUpper<MT4>::value )
1056 ?( ( IsLower<MT5>::value )
1057 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1058 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1059 :( IsLower<MT5>::value ? j : 0UL ) );
1063 for(
size_t k=kbegin; k<K; ++k ) {
1064 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
1067 (~C).
store( i, j, xmm1 );
1089 template<
typename MT3
1092 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1093 selectSmallAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1100 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1104 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1108 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1133 template<
typename MT3
1136 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1137 selectLargeAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1139 selectDefaultAssignKernel( C, A, B );
1159 template<
typename MT3
1162 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1163 selectLargeAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1165 typedef IntrinsicTrait<ElementType> IT;
1167 const size_t M( A.rows() );
1168 const size_t N( B.columns() );
1169 const size_t K( A.columns() );
1171 const size_t iblock( 64UL );
1172 const size_t jblock( 128UL );
1173 const size_t kblock( 128UL );
1175 for(
size_t jj=0UL; jj<N; jj+=jblock )
1177 const size_t jend(
min( jj+jblock, N ) );
1179 for(
size_t ii=0UL; ii<M; ii+=iblock )
1181 const size_t iend(
min( ii+iblock, M ) );
1183 for(
size_t i=ii; i<iend; ++i ) {
1184 for(
size_t j=jj; j<jend; ++j ) {
1189 for(
size_t kk=0UL; kk<K; kk+=kblock )
1191 const size_t ktmp(
min( kk+kblock, K ) );
1203 for( ; (i+2UL) <= iend; i+=2UL )
1205 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
1206 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
1207 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
1208 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
1210 IntrinsicType xmm1( (~C).
load(i ,j ) );
1211 IntrinsicType xmm2( (~C).
load(i ,j1) );
1212 IntrinsicType xmm3( (~C).
load(i ,j2) );
1213 IntrinsicType xmm4( (~C).
load(i ,j3) );
1214 IntrinsicType xmm5( (~C).
load(i+1UL,j ) );
1215 IntrinsicType xmm6( (~C).
load(i+1UL,j1) );
1216 IntrinsicType xmm7( (~C).
load(i+1UL,j2) );
1217 IntrinsicType xmm8( (~C).
load(i+1UL,j3) );
1219 for(
size_t k=kbegin; k<kend; ++k ) {
1220 const IntrinsicType a1(
set( A(i ,k) ) );
1221 const IntrinsicType a2(
set( A(i+1UL,k) ) );
1222 const IntrinsicType b1( B.load(k,j ) );
1223 const IntrinsicType b2( B.load(k,j1) );
1224 const IntrinsicType b3( B.load(k,j2) );
1225 const IntrinsicType b4( B.load(k,j3) );
1226 xmm1 = xmm1 + a1 * b1;
1227 xmm2 = xmm2 + a1 * b2;
1228 xmm3 = xmm3 + a1 * b3;
1229 xmm4 = xmm4 + a1 * b4;
1230 xmm5 = xmm5 + a2 * b1;
1231 xmm6 = xmm6 + a2 * b2;
1232 xmm7 = xmm7 + a2 * b3;
1233 xmm8 = xmm8 + a2 * b4;
1236 (~C).
store( i , j , xmm1 );
1237 (~C).
store( i , j1, xmm2 );
1238 (~C).
store( i , j2, xmm3 );
1239 (~C).
store( i , j3, xmm4 );
1240 (~C).
store( i+1UL, j , xmm5 );
1241 (~C).
store( i+1UL, j1, xmm6 );
1242 (~C).
store( i+1UL, j2, xmm7 );
1243 (~C).
store( i+1UL, j3, xmm8 );
1248 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
1249 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
1250 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
1251 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
1253 IntrinsicType xmm1( (~C).
load(i,j ) );
1254 IntrinsicType xmm2( (~C).
load(i,j1) );
1255 IntrinsicType xmm3( (~C).
load(i,j2) );
1256 IntrinsicType xmm4( (~C).
load(i,j3) );
1258 for(
size_t k=kbegin; k<kend; ++k ) {
1259 const IntrinsicType a1(
set( A(i,k) ) );
1260 xmm1 = xmm1 + a1 * B.load(k,j );
1261 xmm2 = xmm2 + a1 * B.load(k,j1);
1262 xmm3 = xmm3 + a1 * B.load(k,j2);
1263 xmm4 = xmm4 + a1 * B.load(k,j3);
1266 (~C).
store( i, j , xmm1 );
1267 (~C).
store( i, j1, xmm2 );
1268 (~C).
store( i, j2, xmm3 );
1269 (~C).
store( i, j3, xmm4 );
1279 for( ; (i+4UL) <= iend; i+=4UL )
1281 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
1282 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
1283 const size_t kend (
min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
1284 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
1286 IntrinsicType xmm1( (~C).
load(i ,j ) );
1287 IntrinsicType xmm2( (~C).
load(i ,j1) );
1288 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
1289 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
1290 IntrinsicType xmm5( (~C).
load(i+2UL,j ) );
1291 IntrinsicType xmm6( (~C).
load(i+2UL,j1) );
1292 IntrinsicType xmm7( (~C).
load(i+3UL,j ) );
1293 IntrinsicType xmm8( (~C).
load(i+3UL,j1) );
1295 for(
size_t k=kbegin; k<kend; ++k ) {
1296 const IntrinsicType a1(
set( A(i ,k) ) );
1297 const IntrinsicType a2(
set( A(i+1UL,k) ) );
1298 const IntrinsicType a3(
set( A(i+2UL,k) ) );
1299 const IntrinsicType a4(
set( A(i+3UL,k) ) );
1300 const IntrinsicType b1( B.load(k,j ) );
1301 const IntrinsicType b2( B.load(k,j1) );
1302 xmm1 = xmm1 + a1 * b1;
1303 xmm2 = xmm2 + a1 * b2;
1304 xmm3 = xmm3 + a2 * b1;
1305 xmm4 = xmm4 + a2 * b2;
1306 xmm5 = xmm5 + a3 * b1;
1307 xmm6 = xmm6 + a3 * b2;
1308 xmm7 = xmm7 + a4 * b1;
1309 xmm8 = xmm8 + a4 * b2;
1312 (~C).
store( i , j , xmm1 );
1313 (~C).
store( i , j1, xmm2 );
1314 (~C).
store( i+1UL, j , xmm3 );
1315 (~C).
store( i+1UL, j1, xmm4 );
1316 (~C).
store( i+2UL, j , xmm5 );
1317 (~C).
store( i+2UL, j1, xmm6 );
1318 (~C).
store( i+3UL, j , xmm7 );
1319 (~C).
store( i+3UL, j1, xmm8 );
1322 for( ; (i+2UL) <= iend; i+=2UL )
1324 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
1325 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
1326 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
1327 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
1329 IntrinsicType xmm1( (~C).
load(i ,j ) );
1330 IntrinsicType xmm2( (~C).
load(i ,j1) );
1331 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
1332 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
1334 for(
size_t k=kbegin; k<kend; ++k ) {
1335 const IntrinsicType a1(
set( A(i ,k) ) );
1336 const IntrinsicType a2(
set( A(i+1UL,k) ) );
1337 const IntrinsicType b1( B.load(k,j ) );
1338 const IntrinsicType b2( B.load(k,j1) );
1339 xmm1 = xmm1 + a1 * b1;
1340 xmm2 = xmm2 + a1 * b2;
1341 xmm3 = xmm3 + a2 * b1;
1342 xmm4 = xmm4 + a2 * b2;
1345 (~C).
store( i , j , xmm1 );
1346 (~C).
store( i , j1, xmm2 );
1347 (~C).
store( i+1UL, j , xmm3 );
1348 (~C).
store( i+1UL, j1, xmm4 );
1353 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
1354 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
1355 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
1356 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
1358 IntrinsicType xmm1( (~C).
load(i,j ) );
1359 IntrinsicType xmm2( (~C).
load(i,j1) );
1361 for(
size_t k=kbegin; k<kend; ++k ) {
1362 const IntrinsicType a1(
set( A(i,k) ) );
1363 xmm1 = xmm1 + a1 * B.load(k,j );
1364 xmm2 = xmm2 + a1 * B.load(k,j1);
1367 (~C).
store( i, j , xmm1 );
1368 (~C).
store( i, j1, xmm2 );
1374 for(
size_t i=ii; i<iend; ++i )
1376 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
1377 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
1378 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
1379 ( IsUpper<MT5>::value )?(
min( j+
IT::size, ktmp ) ):( ktmp ) ) );
1381 IntrinsicType xmm1( (~C).
load(i,j) );
1383 for(
size_t k=kbegin; k<kend; ++k ) {
1384 const IntrinsicType a1(
set( A(i,k) ) );
1385 xmm1 = xmm1 + a1 * B.load(k,j);
1388 (~C).
store( i, j, xmm1 );
1412 template<
typename MT3
1415 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1416 selectLargeAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1418 selectSmallAssignKernel( ~C, A, B );
1436 template<
typename MT3
1439 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1440 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1442 selectLargeAssignKernel( C, A, B );
1462 template<
typename MT3
1465 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1466 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1468 if( IsTriangular<MT4>::value ) {
1470 strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1472 else if( IsTriangular<MT5>::value ) {
1474 strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1477 sgemm( C, A, B, 1.0F, 0.0F );
1499 template<
typename MT3
1502 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1503 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1505 if( IsTriangular<MT4>::value ) {
1507 dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1509 else if( IsTriangular<MT5>::value ) {
1511 dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1514 dgemm( C, A, B, 1.0, 0.0 );
1536 template<
typename MT3
1539 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1540 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1542 if( IsTriangular<MT4>::value ) {
1544 ctrmm( C, A, CblasLeft,
1545 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1546 complex<float>( 1.0F, 0.0F ) );
1548 else if( IsTriangular<MT5>::value ) {
1550 ctrmm( C, B, CblasRight,
1551 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1552 complex<float>( 1.0F, 0.0F ) );
1555 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1577 template<
typename MT3
1580 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1581 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1583 if( IsTriangular<MT4>::value ) {
1585 ztrmm( C, A, CblasLeft,
1586 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1587 complex<double>( 1.0, 0.0 ) );
1589 else if( IsTriangular<MT5>::value ) {
1591 ztrmm( C, B, CblasRight,
1592 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1593 complex<double>( 1.0, 0.0 ) );
1596 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1616 template<
typename MT
1618 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1623 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1635 const TmpType tmp(
serial( rhs ) );
1656 template<
typename MT >
1657 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1667 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1669 else if( IsSymmetric<MT1>::value )
1690 template<
typename MT
1692 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1700 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1704 LT A(
serial( rhs.lhs_ ) );
1705 RT B(
serial( rhs.rhs_ ) );
1714 DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1730 template<
typename MT3
1733 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1735 if( ( IsDiagonal<MT5>::value ) ||
1737 selectSmallAddAssignKernel( C, A, B );
1739 selectBlasAddAssignKernel( C, A, B );
1758 template<
typename MT3
1761 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
1762 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1764 const size_t M( A.rows() );
1765 const size_t N( B.columns() );
1766 const size_t K( A.columns() );
1768 for(
size_t i=0UL; i<M; ++i )
1770 const size_t kbegin( ( IsUpper<MT4>::value )
1771 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1773 const size_t kend( ( IsLower<MT4>::value )
1774 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1778 for(
size_t k=kbegin; k<kend; ++k )
1780 const size_t jbegin( ( IsUpper<MT5>::value )
1781 ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
1783 const size_t jend( ( IsLower<MT5>::value )
1784 ?( IsStrictlyLower<MT5>::value ? k : k+1UL )
1788 const size_t jnum( jend - jbegin );
1789 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1791 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1792 C(i,j ) += A(i,k) * B(k,j );
1793 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1796 C(i,jpos) += A(i,k) * B(k,jpos);
1818 template<
typename MT3
1821 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
1822 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1826 const size_t M( A.rows() );
1827 const size_t N( B.columns() );
1829 for(
size_t i=0UL; i<M; ++i )
1831 const size_t jbegin( ( IsUpper<MT4>::value )
1832 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1834 const size_t jend( ( IsLower<MT4>::value )
1835 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1839 const size_t jnum( jend - jbegin );
1840 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1842 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1843 C(i,j ) += A(i,j ) * B(j ,j );
1844 C(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
1847 C(i,jpos) += A(i,jpos) * B(jpos,jpos);
1868 template<
typename MT3
1871 static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
1872 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1876 const size_t M( A.rows() );
1877 const size_t N( B.columns() );
1879 for(
size_t i=0UL; i<M; ++i )
1881 const size_t jbegin( ( IsUpper<MT5>::value )
1882 ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
1884 const size_t jend( ( IsLower<MT5>::value )
1885 ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
1889 const size_t jnum( jend - jbegin );
1890 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
1892 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
1893 C(i,j ) += A(i,i) * B(i,j );
1894 C(i,j+1UL) += A(i,i) * B(i,j+1UL);
1897 C(i,jpos) += A(i,i) * B(i,jpos);
1918 template<
typename MT3
1921 static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
1922 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1926 for(
size_t i=0UL; i<A.rows(); ++i ) {
1927 C(i,i) += A(i,i) * B(i,i);
1947 template<
typename MT3
1950 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1951 selectSmallAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1953 selectDefaultAddAssignKernel( C, A, B );
1973 template<
typename MT3
1976 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1977 selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1979 typedef IntrinsicTrait<ElementType> IT;
1981 const size_t M( A.rows() );
1982 const size_t N( B.columns() );
1983 const size_t K( A.columns() );
1988 for(
size_t i=0UL; i<M; ++i )
1990 const size_t kbegin( ( IsUpper<MT4>::value )
1991 ?( ( IsLower<MT5>::value )
1992 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1993 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1994 :( IsLower<MT5>::value ? j : 0UL ) );
1995 const size_t kend( ( IsLower<MT4>::value )
1996 ?( ( IsUpper<MT5>::value )
1997 ?(
min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+
IT::size*8UL, K ) )
1998 :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1999 :( IsUpper<MT5>::value ?
min( j+
IT::size*8UL, K ) : K ) );
2001 IntrinsicType xmm1( (~C).
load(i,j ) );
2010 for(
size_t k=kbegin; k<kend; ++k ) {
2011 const IntrinsicType a1(
set( A(i,k) ) );
2012 xmm1 = xmm1 + a1 * B.load(k,j );
2013 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
2014 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
2015 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
2016 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
2017 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
2018 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
2019 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
2022 (~C).
store( i, j , xmm1 );
2037 for( ; (i+2UL) <= M; i+=2UL )
2039 const size_t kbegin( ( IsUpper<MT4>::value )
2040 ?( ( IsLower<MT5>::value )
2041 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2042 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2043 :( IsLower<MT5>::value ? j : 0UL ) );
2044 const size_t kend( ( IsLower<MT4>::value )
2045 ?( ( IsUpper<MT5>::value )
2046 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*4UL, K ) )
2047 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2048 :( IsUpper<MT5>::value ?
min( j+
IT::size*4UL, K ) : K ) );
2050 IntrinsicType xmm1( (~C).
load(i ,j ) );
2054 IntrinsicType xmm5( (~C).
load(i+1UL,j ) );
2059 for(
size_t k=kbegin; k<kend; ++k ) {
2060 const IntrinsicType a1(
set( A(i ,k) ) );
2061 const IntrinsicType a2(
set( A(i+1UL,k) ) );
2062 const IntrinsicType b1( B.load(k,j ) );
2063 const IntrinsicType b2( B.load(k,j+
IT::size ) );
2064 const IntrinsicType b3( B.load(k,j+
IT::size*2UL) );
2065 const IntrinsicType b4( B.load(k,j+
IT::size*3UL) );
2066 xmm1 = xmm1 + a1 * b1;
2067 xmm2 = xmm2 + a1 * b2;
2068 xmm3 = xmm3 + a1 * b3;
2069 xmm4 = xmm4 + a1 * b4;
2070 xmm5 = xmm5 + a2 * b1;
2071 xmm6 = xmm6 + a2 * b2;
2072 xmm7 = xmm7 + a2 * b3;
2073 xmm8 = xmm8 + a2 * b4;
2076 (~C).
store( i , j , xmm1 );
2080 (~C).
store( i+1UL, j , xmm5 );
2088 const size_t kbegin( ( IsUpper<MT4>::value )
2089 ?( ( IsLower<MT5>::value )
2090 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2091 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2092 :( IsLower<MT5>::value ? j : 0UL ) );
2093 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, K ) ):( K ) );
2095 IntrinsicType xmm1( (~C).
load(i,j ) );
2100 for(
size_t k=kbegin; k<kend; ++k ) {
2101 const IntrinsicType a1(
set( A(i,k) ) );
2102 xmm1 = xmm1 + a1 * B.load(k,j );
2103 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
2104 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
2105 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
2108 (~C).
store( i, j , xmm1 );
2119 for( ; (i+2UL) <= M; i+=2UL )
2121 const size_t kbegin( ( IsUpper<MT4>::value )
2122 ?( ( IsLower<MT5>::value )
2123 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2124 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2125 :( IsLower<MT5>::value ? j : 0UL ) );
2126 const size_t kend( ( IsLower<MT4>::value )
2127 ?( ( IsUpper<MT5>::value )
2128 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*2UL, K ) )
2129 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2130 :( IsUpper<MT5>::value ?
min( j+
IT::size*2UL, K ) : K ) );
2132 IntrinsicType xmm1( (~C).
load(i ,j ) );
2134 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
2137 for(
size_t k=kbegin; k<kend; ++k ) {
2138 const IntrinsicType a1(
set( A(i ,k) ) );
2139 const IntrinsicType a2(
set( A(i+1UL,k) ) );
2140 const IntrinsicType b1( B.load(k,j ) );
2141 const IntrinsicType b2( B.load(k,j+
IT::size) );
2142 xmm1 = xmm1 + a1 * b1;
2143 xmm2 = xmm2 + a1 * b2;
2144 xmm3 = xmm3 + a2 * b1;
2145 xmm4 = xmm4 + a2 * b2;
2148 (~C).
store( i , j , xmm1 );
2150 (~C).
store( i+1UL, j , xmm3 );
2156 const size_t kbegin( ( IsUpper<MT4>::value )
2157 ?( ( IsLower<MT5>::value )
2158 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2159 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2160 :( IsLower<MT5>::value ? j : 0UL ) );
2161 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, K ) ):( K ) );
2163 IntrinsicType xmm1( (~C).
load(i,j ) );
2166 for(
size_t k=kbegin; k<kend; ++k ) {
2167 const IntrinsicType a1(
set( A(i,k) ) );
2168 xmm1 = xmm1 + a1 * B.load(k,j );
2169 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
2172 (~C).
store( i, j , xmm1 );
2181 for( ; (i+2UL) <= M; i+=2UL )
2183 const size_t kbegin( ( IsUpper<MT4>::value )
2184 ?( ( IsLower<MT5>::value )
2185 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2186 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2187 :( IsLower<MT5>::value ? j : 0UL ) );
2188 const size_t kend( ( IsLower<MT4>::value )
2189 ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
2192 IntrinsicType xmm1( (~C).
load(i ,j) );
2193 IntrinsicType xmm2( (~C).
load(i+1UL,j) );
2195 for(
size_t k=kbegin; k<kend; ++k ) {
2196 const IntrinsicType b1( B.load(k,j) );
2197 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
2198 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
2201 (~C).
store( i , j, xmm1 );
2202 (~C).
store( i+1UL, j, xmm2 );
2207 const size_t kbegin( ( IsUpper<MT4>::value )
2208 ?( ( IsLower<MT5>::value )
2209 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2210 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2211 :( IsLower<MT5>::value ? j : 0UL ) );
2213 IntrinsicType xmm1( (~C).
load(i,j) );
2215 for(
size_t k=kbegin; k<K; ++k ) {
2216 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
2219 (~C).
store( i, j, xmm1 );
2241 template<
typename MT3
2244 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2245 selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
2252 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2256 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2260 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2286 template<
typename MT3
2289 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2290 selectLargeAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2292 selectDefaultAddAssignKernel( C, A, B );
2312 template<
typename MT3
2315 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2316 selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
2318 typedef IntrinsicTrait<ElementType> IT;
2320 const size_t M( A.rows() );
2321 const size_t N( B.columns() );
2322 const size_t K( A.columns() );
2324 const size_t iblock( 64UL );
2325 const size_t jblock( 128UL );
2326 const size_t kblock( 128UL );
2328 for(
size_t jj=0UL; jj<N; jj+=jblock )
2330 const size_t jend(
min( jj+jblock, N ) );
2332 for(
size_t ii=0UL; ii<M; ii+=iblock )
2334 const size_t iend(
min( ii+iblock, M ) );
2336 for(
size_t kk=0UL; kk<K; kk+=kblock )
2338 const size_t ktmp(
min( kk+kblock, K ) );
2350 for( ; (i+2UL) <= iend; i+=2UL )
2352 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
2353 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
2354 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
2355 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
2357 IntrinsicType xmm1( (~C).
load(i ,j ) );
2358 IntrinsicType xmm2( (~C).
load(i ,j1) );
2359 IntrinsicType xmm3( (~C).
load(i ,j2) );
2360 IntrinsicType xmm4( (~C).
load(i ,j3) );
2361 IntrinsicType xmm5( (~C).
load(i+1UL,j ) );
2362 IntrinsicType xmm6( (~C).
load(i+1UL,j1) );
2363 IntrinsicType xmm7( (~C).
load(i+1UL,j2) );
2364 IntrinsicType xmm8( (~C).
load(i+1UL,j3) );
2366 for(
size_t k=kbegin; k<kend; ++k ) {
2367 const IntrinsicType a1(
set( A(i ,k) ) );
2368 const IntrinsicType a2(
set( A(i+1UL,k) ) );
2369 const IntrinsicType b1( B.load(k,j ) );
2370 const IntrinsicType b2( B.load(k,j1) );
2371 const IntrinsicType b3( B.load(k,j2) );
2372 const IntrinsicType b4( B.load(k,j3) );
2373 xmm1 = xmm1 + a1 * b1;
2374 xmm2 = xmm2 + a1 * b2;
2375 xmm3 = xmm3 + a1 * b3;
2376 xmm4 = xmm4 + a1 * b4;
2377 xmm5 = xmm5 + a2 * b1;
2378 xmm6 = xmm6 + a2 * b2;
2379 xmm7 = xmm7 + a2 * b3;
2380 xmm8 = xmm8 + a2 * b4;
2383 (~C).
store( i , j , xmm1 );
2384 (~C).
store( i , j1, xmm2 );
2385 (~C).
store( i , j2, xmm3 );
2386 (~C).
store( i , j3, xmm4 );
2387 (~C).
store( i+1UL, j , xmm5 );
2388 (~C).
store( i+1UL, j1, xmm6 );
2389 (~C).
store( i+1UL, j2, xmm7 );
2390 (~C).
store( i+1UL, j3, xmm8 );
2395 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
2396 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
2397 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
2398 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
2400 IntrinsicType xmm1( (~C).
load(i,j ) );
2401 IntrinsicType xmm2( (~C).
load(i,j1) );
2402 IntrinsicType xmm3( (~C).
load(i,j2) );
2403 IntrinsicType xmm4( (~C).
load(i,j3) );
2405 for(
size_t k=kbegin; k<kend; ++k ) {
2406 const IntrinsicType a1(
set( A(i,k) ) );
2407 xmm1 = xmm1 + a1 * B.load(k,j );
2408 xmm2 = xmm2 + a1 * B.load(k,j1);
2409 xmm3 = xmm3 + a1 * B.load(k,j2);
2410 xmm4 = xmm4 + a1 * B.load(k,j3);
2413 (~C).
store( i, j , xmm1 );
2414 (~C).
store( i, j1, xmm2 );
2415 (~C).
store( i, j2, xmm3 );
2416 (~C).
store( i, j3, xmm4 );
2426 for( ; (i+4UL) <= iend; i+=4UL )
2428 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
2429 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
2430 const size_t kend (
min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
2431 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
2433 IntrinsicType xmm1( (~C).
load(i ,j ) );
2434 IntrinsicType xmm2( (~C).
load(i ,j1) );
2435 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
2436 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
2437 IntrinsicType xmm5( (~C).
load(i+2UL,j ) );
2438 IntrinsicType xmm6( (~C).
load(i+2UL,j1) );
2439 IntrinsicType xmm7( (~C).
load(i+3UL,j ) );
2440 IntrinsicType xmm8( (~C).
load(i+3UL,j1) );
2442 for(
size_t k=kbegin; k<kend; ++k ) {
2443 const IntrinsicType a1(
set( A(i ,k) ) );
2444 const IntrinsicType a2(
set( A(i+1UL,k) ) );
2445 const IntrinsicType a3(
set( A(i+2UL,k) ) );
2446 const IntrinsicType a4(
set( A(i+3UL,k) ) );
2447 const IntrinsicType b1( B.load(k,j ) );
2448 const IntrinsicType b2( B.load(k,j1) );
2449 xmm1 = xmm1 + a1 * b1;
2450 xmm2 = xmm2 + a1 * b2;
2451 xmm3 = xmm3 + a2 * b1;
2452 xmm4 = xmm4 + a2 * b2;
2453 xmm5 = xmm5 + a3 * b1;
2454 xmm6 = xmm6 + a3 * b2;
2455 xmm7 = xmm7 + a4 * b1;
2456 xmm8 = xmm8 + a4 * b2;
2459 (~C).
store( i , j , xmm1 );
2460 (~C).
store( i , j1, xmm2 );
2461 (~C).
store( i+1UL, j , xmm3 );
2462 (~C).
store( i+1UL, j1, xmm4 );
2463 (~C).
store( i+2UL, j , xmm5 );
2464 (~C).
store( i+2UL, j1, xmm6 );
2465 (~C).
store( i+3UL, j , xmm7 );
2466 (~C).
store( i+3UL, j1, xmm8 );
2469 for( ; (i+2UL) <= iend; i+=2UL )
2471 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
2472 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
2473 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
2474 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
2476 IntrinsicType xmm1( (~C).
load(i ,j ) );
2477 IntrinsicType xmm2( (~C).
load(i ,j1) );
2478 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
2479 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
2481 for(
size_t k=kbegin; k<kend; ++k ) {
2482 const IntrinsicType a1(
set( A(i ,k) ) );
2483 const IntrinsicType a2(
set( A(i+1UL,k) ) );
2484 const IntrinsicType b1( B.load(k,j ) );
2485 const IntrinsicType b2( B.load(k,j1) );
2486 xmm1 = xmm1 + a1 * b1;
2487 xmm2 = xmm2 + a1 * b2;
2488 xmm3 = xmm3 + a2 * b1;
2489 xmm4 = xmm4 + a2 * b2;
2492 (~C).
store( i , j , xmm1 );
2493 (~C).
store( i , j1, xmm2 );
2494 (~C).
store( i+1UL, j , xmm3 );
2495 (~C).
store( i+1UL, j1, xmm4 );
2500 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
2501 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
2502 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
2503 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
2505 IntrinsicType xmm1( (~C).
load(i,j ) );
2506 IntrinsicType xmm2( (~C).
load(i,j1) );
2508 for(
size_t k=kbegin; k<kend; ++k ) {
2509 const IntrinsicType a1(
set( A(i,k) ) );
2510 xmm1 = xmm1 + a1 * B.load(k,j );
2511 xmm2 = xmm2 + a1 * B.load(k,j1);
2514 (~C).
store( i, j , xmm1 );
2515 (~C).
store( i, j1, xmm2 );
2521 for(
size_t i=ii; i<iend; ++i )
2523 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
2524 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
2525 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
2526 ( IsUpper<MT5>::value )?(
min( j+
IT::size, ktmp ) ):( ktmp ) ) );
2528 IntrinsicType xmm1( (~C).
load(i,j) );
2530 for(
size_t k=kbegin; k<kend; ++k ) {
2531 const IntrinsicType a1(
set( A(i,k) ) );
2532 xmm1 = xmm1 + a1 * B.load(k,j);
2535 (~C).
store( i, j, xmm1 );
2559 template<
typename MT3
2562 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2563 selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
2565 selectSmallAddAssignKernel( ~C, A, B );
2584 template<
typename MT3
2587 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2588 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2590 selectLargeAddAssignKernel( C, A, B );
2610 template<
typename MT3
2613 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2614 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2616 if( IsTriangular<MT4>::value ) {
2618 strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2621 else if( IsTriangular<MT5>::value ) {
2623 strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2627 sgemm( C, A, B, 1.0F, 1.0F );
2649 template<
typename MT3
2652 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2653 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2655 if( IsTriangular<MT4>::value ) {
2657 dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2660 else if( IsTriangular<MT5>::value ) {
2662 dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2666 dgemm( C, A, B, 1.0, 1.0 );
2688 template<
typename MT3
2691 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2692 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2694 if( IsTriangular<MT4>::value ) {
2696 ctrmm( tmp, A, CblasLeft,
2697 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2698 complex<float>( 1.0F, 0.0F ) );
2701 else if( IsTriangular<MT5>::value ) {
2703 ctrmm( tmp, B, CblasRight,
2704 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2705 complex<float>( 1.0F, 0.0F ) );
2709 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2731 template<
typename MT3
2734 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2735 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2737 if( IsTriangular<MT4>::value ) {
2739 ztrmm( tmp, A, CblasLeft,
2740 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2741 complex<double>( 1.0, 0.0 ) );
2744 else if( IsTriangular<MT5>::value ) {
2746 ztrmm( tmp, B, CblasRight,
2747 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2748 complex<double>( 1.0, 0.0 ) );
2752 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2774 template<
typename MT >
2775 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2785 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2787 else if( IsSymmetric<MT1>::value )
2812 template<
typename MT
2814 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2822 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2826 LT A(
serial( rhs.lhs_ ) );
2827 RT B(
serial( rhs.rhs_ ) );
2836 DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2852 template<
typename MT3
2855 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2857 if( ( IsDiagonal<MT5>::value ) ||
2859 selectSmallSubAssignKernel( C, A, B );
2861 selectBlasSubAssignKernel( C, A, B );
2880 template<
typename MT3
2883 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
2884 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2886 const size_t M( A.rows() );
2887 const size_t N( B.columns() );
2888 const size_t K( A.columns() );
2890 for(
size_t i=0UL; i<M; ++i )
2892 const size_t kbegin( ( IsUpper<MT4>::value )
2893 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2895 const size_t kend( ( IsLower<MT4>::value )
2896 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2900 for(
size_t k=kbegin; k<kend; ++k )
2902 const size_t jbegin( ( IsUpper<MT5>::value )
2903 ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
2905 const size_t jend( ( IsLower<MT5>::value )
2906 ?( IsStrictlyLower<MT5>::value ? k : k+1UL )
2910 const size_t jnum( jend - jbegin );
2911 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
2913 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
2914 C(i,j ) -= A(i,k) * B(k,j );
2915 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
2918 C(i,jpos) -= A(i,k) * B(k,jpos);
2940 template<
typename MT3
2943 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
2944 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2948 const size_t M( A.rows() );
2949 const size_t N( B.columns() );
2951 for(
size_t i=0UL; i<M; ++i )
2953 const size_t jbegin( ( IsUpper<MT4>::value )
2954 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2956 const size_t jend( ( IsLower<MT4>::value )
2957 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2961 const size_t jnum( jend - jbegin );
2962 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
2964 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
2965 C(i,j ) -= A(i,j ) * B(j ,j );
2966 C(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
2969 C(i,jpos) -= A(i,jpos) * B(jpos,jpos);
2990 template<
typename MT3
2993 static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
2994 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2998 const size_t M( A.rows() );
2999 const size_t N( B.columns() );
3001 for(
size_t i=0UL; i<M; ++i )
3003 const size_t jbegin( ( IsUpper<MT5>::value )
3004 ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
3006 const size_t jend( ( IsLower<MT5>::value )
3007 ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
3011 const size_t jnum( jend - jbegin );
3012 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
3014 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
3015 C(i,j ) -= A(i,i) * B(i,j );
3016 C(i,j+1UL) -= A(i,i) * B(i,j+1UL);
3019 C(i,jpos) -= A(i,i) * B(i,jpos);
3040 template<
typename MT3
3043 static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
3044 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3048 for(
size_t i=0UL; i<A.rows(); ++i ) {
3049 C(i,i) -= A(i,i) * B(i,i);
3069 template<
typename MT3
3072 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3073 selectSmallSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3075 selectDefaultSubAssignKernel( C, A, B );
3095 template<
typename MT3
3098 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3099 selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
3101 typedef IntrinsicTrait<ElementType> IT;
3103 const size_t M( A.rows() );
3104 const size_t N( B.columns() );
3105 const size_t K( A.columns() );
3110 for(
size_t i=0UL; i<M; ++i )
3112 const size_t kbegin( ( IsUpper<MT4>::value )
3113 ?( ( IsLower<MT5>::value )
3114 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3115 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3116 :( IsLower<MT5>::value ? j : 0UL ) );
3117 const size_t kend( ( IsLower<MT4>::value )
3118 ?( ( IsUpper<MT5>::value )
3119 ?(
min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+
IT::size*8UL, K ) )
3120 :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3121 :( IsUpper<MT5>::value ?
min( j+
IT::size*8UL, K ) : K ) );
3123 IntrinsicType xmm1( (~C).
load(i,j ) );
3132 for(
size_t k=kbegin; k<kend; ++k ) {
3133 const IntrinsicType a1(
set( A(i,k) ) );
3134 xmm1 = xmm1 - a1 * B.load(k,j );
3135 xmm2 = xmm2 - a1 * B.load(k,j+
IT::size );
3136 xmm3 = xmm3 - a1 * B.load(k,j+
IT::size*2UL);
3137 xmm4 = xmm4 - a1 * B.load(k,j+
IT::size*3UL);
3138 xmm5 = xmm5 - a1 * B.load(k,j+
IT::size*4UL);
3139 xmm6 = xmm6 - a1 * B.load(k,j+
IT::size*5UL);
3140 xmm7 = xmm7 - a1 * B.load(k,j+
IT::size*6UL);
3141 xmm8 = xmm8 - a1 * B.load(k,j+
IT::size*7UL);
3144 (~C).
store( i, j , xmm1 );
3159 for( ; (i+2UL) <= M; i+=2UL )
3161 const size_t kbegin( ( IsUpper<MT4>::value )
3162 ?( ( IsLower<MT5>::value )
3163 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3164 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3165 :( IsLower<MT5>::value ? j : 0UL ) );
3166 const size_t kend( ( IsLower<MT4>::value )
3167 ?( ( IsUpper<MT5>::value )
3168 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*4UL, K ) )
3169 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3170 :( IsUpper<MT5>::value ?
min( j+
IT::size*4UL, K ) : K ) );
3172 IntrinsicType xmm1( (~C).
load(i ,j ) );
3176 IntrinsicType xmm5( (~C).
load(i+1UL,j ) );
3181 for(
size_t k=kbegin; k<kend; ++k ) {
3182 const IntrinsicType a1(
set( A(i ,k) ) );
3183 const IntrinsicType a2(
set( A(i+1UL,k) ) );
3184 const IntrinsicType b1( B.load(k,j ) );
3185 const IntrinsicType b2( B.load(k,j+
IT::size ) );
3186 const IntrinsicType b3( B.load(k,j+
IT::size*2UL) );
3187 const IntrinsicType b4( B.load(k,j+
IT::size*3UL) );
3188 xmm1 = xmm1 - a1 * b1;
3189 xmm2 = xmm2 - a1 * b2;
3190 xmm3 = xmm3 - a1 * b3;
3191 xmm4 = xmm4 - a1 * b4;
3192 xmm5 = xmm5 - a2 * b1;
3193 xmm6 = xmm6 - a2 * b2;
3194 xmm7 = xmm7 - a2 * b3;
3195 xmm8 = xmm8 - a2 * b4;
3198 (~C).
store( i , j , xmm1 );
3202 (~C).
store( i+1UL, j , xmm5 );
3210 const size_t kbegin( ( IsUpper<MT4>::value )
3211 ?( ( IsLower<MT5>::value )
3212 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3213 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3214 :( IsLower<MT5>::value ? j : 0UL ) );
3215 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, K ) ):( K ) );
3217 IntrinsicType xmm1( (~C).
load(i,j ) );
3222 for(
size_t k=kbegin; k<kend; ++k ) {
3223 const IntrinsicType a1(
set( A(i,k) ) );
3224 xmm1 = xmm1 - a1 * B.load(k,j );
3225 xmm2 = xmm2 - a1 * B.load(k,j+
IT::size );
3226 xmm3 = xmm3 - a1 * B.load(k,j+
IT::size*2UL);
3227 xmm4 = xmm4 - a1 * B.load(k,j+
IT::size*3UL);
3230 (~C).
store( i, j , xmm1 );
3241 for( ; (i+2UL) <= M; i+=2UL )
3243 const size_t kbegin( ( IsUpper<MT4>::value )
3244 ?( ( IsLower<MT5>::value )
3245 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3246 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3247 :( IsLower<MT5>::value ? j : 0UL ) );
3248 const size_t kend( ( IsLower<MT4>::value )
3249 ?( ( IsUpper<MT5>::value )
3250 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*2UL, K ) )
3251 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3252 :( IsUpper<MT5>::value ?
min( j+
IT::size*2UL, K ) : K ) );
3254 IntrinsicType xmm1( (~C).
load(i ,j ) );
3256 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
3259 for(
size_t k=kbegin; k<kend; ++k ) {
3260 const IntrinsicType a1(
set( A(i ,k) ) );
3261 const IntrinsicType a2(
set( A(i+1UL,k) ) );
3262 const IntrinsicType b1( B.load(k,j ) );
3263 const IntrinsicType b2( B.load(k,j+
IT::size) );
3264 xmm1 = xmm1 - a1 * b1;
3265 xmm2 = xmm2 - a1 * b2;
3266 xmm3 = xmm3 - a2 * b1;
3267 xmm4 = xmm4 - a2 * b2;
3270 (~C).
store( i , j , xmm1 );
3272 (~C).
store( i+1UL, j , xmm3 );
3278 const size_t kbegin( ( IsUpper<MT4>::value )
3279 ?( ( IsLower<MT5>::value )
3280 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3281 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3282 :( IsLower<MT5>::value ? j : 0UL ) );
3283 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, K ) ):( K ) );
3285 IntrinsicType xmm1( (~C).
load(i,j ) );
3288 for(
size_t k=kbegin; k<kend; ++k ) {
3289 const IntrinsicType a1(
set( A(i,k) ) );
3290 xmm1 = xmm1 - a1 * B.load(k,j );
3291 xmm2 = xmm2 - a1 * B.load(k,j+
IT::size);
3294 (~C).
store( i, j , xmm1 );
3303 for( ; (i+2UL) <= M; i+=2UL )
3305 const size_t kbegin( ( IsUpper<MT4>::value )
3306 ?( ( IsLower<MT5>::value )
3307 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3308 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3309 :( IsLower<MT5>::value ? j : 0UL ) );
3310 const size_t kend( ( IsLower<MT4>::value )
3311 ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
3314 IntrinsicType xmm1( (~C).
load(i ,j) );
3315 IntrinsicType xmm2( (~C).
load(i+1UL,j) );
3317 for(
size_t k=kbegin; k<kend; ++k ) {
3318 const IntrinsicType b1( B.load(k,j) );
3319 xmm1 = xmm1 -
set( A(i ,k) ) * b1;
3320 xmm2 = xmm2 -
set( A(i+1UL,k) ) * b1;
3323 (~C).
store( i , j, xmm1 );
3324 (~C).
store( i+1UL, j, xmm2 );
3329 const size_t kbegin( ( IsUpper<MT4>::value )
3330 ?( ( IsLower<MT5>::value )
3331 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3332 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3333 :( IsLower<MT5>::value ? j : 0UL ) );
3335 IntrinsicType xmm1( (~C).
load(i,j) );
3337 for(
size_t k=kbegin; k<K; ++k ) {
3338 xmm1 = xmm1 -
set( A(i,k) ) * B.load(k,j);
3341 (~C).
store( i, j, xmm1 );
3363 template<
typename MT3
3366 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3367 selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
3374 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3378 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3382 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3408 template<
typename MT3
3411 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3412 selectLargeSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3414 selectDefaultSubAssignKernel( C, A, B );
3434 template<
typename MT3
3437 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3438 selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
3440 typedef IntrinsicTrait<ElementType> IT;
3442 const size_t M( A.rows() );
3443 const size_t N( B.columns() );
3444 const size_t K( A.columns() );
3446 const size_t iblock( 64UL );
3447 const size_t jblock( 128UL );
3448 const size_t kblock( 128UL );
3450 for(
size_t jj=0UL; jj<N; jj+=jblock )
3452 const size_t jend(
min( jj+jblock, N ) );
3454 for(
size_t ii=0UL; ii<M; ii+=iblock )
3456 const size_t iend(
min( ii+iblock, M ) );
3458 for(
size_t kk=0UL; kk<K; kk+=kblock )
3460 const size_t ktmp(
min( kk+kblock, K ) );
3472 for( ; (i+2UL) <= iend; i+=2UL )
3474 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
3475 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
3476 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
3477 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
3479 IntrinsicType xmm1( (~C).
load(i ,j ) );
3480 IntrinsicType xmm2( (~C).
load(i ,j1) );
3481 IntrinsicType xmm3( (~C).
load(i ,j2) );
3482 IntrinsicType xmm4( (~C).
load(i ,j3) );
3483 IntrinsicType xmm5( (~C).
load(i+1UL,j ) );
3484 IntrinsicType xmm6( (~C).
load(i+1UL,j1) );
3485 IntrinsicType xmm7( (~C).
load(i+1UL,j2) );
3486 IntrinsicType xmm8( (~C).
load(i+1UL,j3) );
3488 for(
size_t k=kbegin; k<kend; ++k ) {
3489 const IntrinsicType a1(
set( A(i ,k) ) );
3490 const IntrinsicType a2(
set( A(i+1UL,k) ) );
3491 const IntrinsicType b1( B.load(k,j ) );
3492 const IntrinsicType b2( B.load(k,j1) );
3493 const IntrinsicType b3( B.load(k,j2) );
3494 const IntrinsicType b4( B.load(k,j3) );
3495 xmm1 = xmm1 - a1 * b1;
3496 xmm2 = xmm2 - a1 * b2;
3497 xmm3 = xmm3 - a1 * b3;
3498 xmm4 = xmm4 - a1 * b4;
3499 xmm5 = xmm5 - a2 * b1;
3500 xmm6 = xmm6 - a2 * b2;
3501 xmm7 = xmm7 - a2 * b3;
3502 xmm8 = xmm8 - a2 * b4;
3505 (~C).
store( i , j , xmm1 );
3506 (~C).
store( i , j1, xmm2 );
3507 (~C).
store( i , j2, xmm3 );
3508 (~C).
store( i , j3, xmm4 );
3509 (~C).
store( i+1UL, j , xmm5 );
3510 (~C).
store( i+1UL, j1, xmm6 );
3511 (~C).
store( i+1UL, j2, xmm7 );
3512 (~C).
store( i+1UL, j3, xmm8 );
3517 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
3518 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
3519 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
3520 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
3522 IntrinsicType xmm1( (~C).
load(i,j ) );
3523 IntrinsicType xmm2( (~C).
load(i,j1) );
3524 IntrinsicType xmm3( (~C).
load(i,j2) );
3525 IntrinsicType xmm4( (~C).
load(i,j3) );
3527 for(
size_t k=kbegin; k<kend; ++k ) {
3528 const IntrinsicType a1(
set( A(i,k) ) );
3529 xmm1 = xmm1 - a1 * B.load(k,j );
3530 xmm2 = xmm2 - a1 * B.load(k,j1);
3531 xmm3 = xmm3 - a1 * B.load(k,j2);
3532 xmm4 = xmm4 - a1 * B.load(k,j3);
3535 (~C).
store( i, j , xmm1 );
3536 (~C).
store( i, j1, xmm2 );
3537 (~C).
store( i, j2, xmm3 );
3538 (~C).
store( i, j3, xmm4 );
3548 for( ; (i+4UL) <= iend; i+=4UL )
3550 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
3551 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
3552 const size_t kend (
min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
3553 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
3555 IntrinsicType xmm1( (~C).
load(i ,j ) );
3556 IntrinsicType xmm2( (~C).
load(i ,j1) );
3557 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
3558 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
3559 IntrinsicType xmm5( (~C).
load(i+2UL,j ) );
3560 IntrinsicType xmm6( (~C).
load(i+2UL,j1) );
3561 IntrinsicType xmm7( (~C).
load(i+3UL,j ) );
3562 IntrinsicType xmm8( (~C).
load(i+3UL,j1) );
3564 for(
size_t k=kbegin; k<kend; ++k ) {
3565 const IntrinsicType a1(
set( A(i ,k) ) );
3566 const IntrinsicType a2(
set( A(i+1UL,k) ) );
3567 const IntrinsicType a3(
set( A(i+2UL,k) ) );
3568 const IntrinsicType a4(
set( A(i+3UL,k) ) );
3569 const IntrinsicType b1( B.load(k,j ) );
3570 const IntrinsicType b2( B.load(k,j1) );
3571 xmm1 = xmm1 - a1 * b1;
3572 xmm2 = xmm2 - a1 * b2;
3573 xmm3 = xmm3 - a2 * b1;
3574 xmm4 = xmm4 - a2 * b2;
3575 xmm5 = xmm5 - a3 * b1;
3576 xmm6 = xmm6 - a3 * b2;
3577 xmm7 = xmm7 - a4 * b1;
3578 xmm8 = xmm8 - a4 * b2;
3581 (~C).
store( i , j , xmm1 );
3582 (~C).
store( i , j1, xmm2 );
3583 (~C).
store( i+1UL, j , xmm3 );
3584 (~C).
store( i+1UL, j1, xmm4 );
3585 (~C).
store( i+2UL, j , xmm5 );
3586 (~C).
store( i+2UL, j1, xmm6 );
3587 (~C).
store( i+3UL, j , xmm7 );
3588 (~C).
store( i+3UL, j1, xmm8 );
3591 for( ; (i+2UL) <= iend; i+=2UL )
3593 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
3594 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
3595 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
3596 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
3598 IntrinsicType xmm1( (~C).
load(i ,j ) );
3599 IntrinsicType xmm2( (~C).
load(i ,j1) );
3600 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
3601 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
3603 for(
size_t k=kbegin; k<kend; ++k ) {
3604 const IntrinsicType a1(
set( A(i ,k) ) );
3605 const IntrinsicType a2(
set( A(i+1UL,k) ) );
3606 const IntrinsicType b1( B.load(k,j ) );
3607 const IntrinsicType b2( B.load(k,j1) );
3608 xmm1 = xmm1 - a1 * b1;
3609 xmm2 = xmm2 - a1 * b2;
3610 xmm3 = xmm3 - a2 * b1;
3611 xmm4 = xmm4 - a2 * b2;
3614 (~C).
store( i , j , xmm1 );
3615 (~C).
store( i , j1, xmm2 );
3616 (~C).
store( i+1UL, j , xmm3 );
3617 (~C).
store( i+1UL, j1, xmm4 );
3622 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
3623 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
3624 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
3625 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
3627 IntrinsicType xmm1( (~C).
load(i,j ) );
3628 IntrinsicType xmm2( (~C).
load(i,j1) );
3630 for(
size_t k=kbegin; k<kend; ++k ) {
3631 const IntrinsicType a1(
set( A(i,k) ) );
3632 xmm1 = xmm1 - a1 * B.load(k,j );
3633 xmm2 = xmm2 - a1 * B.load(k,j1);
3636 (~C).
store( i, j , xmm1 );
3637 (~C).
store( i, j1, xmm2 );
3643 for(
size_t i=ii; i<iend; ++i )
3645 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
3646 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
3647 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
3648 ( IsUpper<MT5>::value )?(
min( j+
IT::size, ktmp ) ):( ktmp ) ) );
3650 IntrinsicType xmm1( (~C).
load(i,j) );
3652 for(
size_t k=kbegin; k<kend; ++k ) {
3653 const IntrinsicType a1(
set( A(i,k) ) );
3654 xmm1 = xmm1 - a1 * B.load(k,j);
3657 (~C).
store( i, j, xmm1 );
3681 template<
typename MT3
3684 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3685 selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
3687 selectSmallSubAssignKernel( ~C, A, B );
3706 template<
typename MT3
3709 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
3710 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3712 selectLargeSubAssignKernel( C, A, B );
3732 template<
typename MT3
3735 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
3736 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3738 if( IsTriangular<MT4>::value ) {
3740 strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3743 else if( IsTriangular<MT5>::value ) {
3745 strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3749 sgemm( C, A, B, -1.0F, 1.0F );
3771 template<
typename MT3
3774 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
3775 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3777 if( IsTriangular<MT4>::value ) {
3779 dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3782 else if( IsTriangular<MT5>::value ) {
3784 dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3788 dgemm( C, A, B, -1.0, 1.0 );
3810 template<
typename MT3
3813 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3814 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3816 if( IsTriangular<MT4>::value ) {
3818 ctrmm( tmp, A, CblasLeft,
3819 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3820 complex<float>( 1.0F, 0.0F ) );
3823 else if( IsTriangular<MT5>::value ) {
3825 ctrmm( tmp, B, CblasRight,
3826 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3827 complex<float>( 1.0F, 0.0F ) );
3831 cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3853 template<
typename MT3
3856 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3857 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
3859 if( IsTriangular<MT4>::value ) {
3861 ztrmm( tmp, A, CblasLeft,
3862 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3863 complex<float>( 1.0, 0.0 ) );
3866 else if( IsTriangular<MT5>::value ) {
3868 ztrmm( tmp, B, CblasRight,
3869 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3870 complex<float>( 1.0, 0.0 ) );
3874 zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
3896 template<
typename MT >
3897 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3907 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3909 else if( IsSymmetric<MT1>::value )
3944 template<
typename MT
3946 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3954 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
3957 else if( rhs.lhs_.columns() == 0UL ) {
3992 template<
typename MT
3994 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3999 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4011 const TmpType tmp( rhs );
4032 template<
typename MT >
4033 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4043 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4045 else if( IsSymmetric<MT1>::value )
4069 template<
typename MT
4071 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4079 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4113 template<
typename MT >
4114 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4124 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4126 else if( IsSymmetric<MT1>::value )
4154 template<
typename MT
4156 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4164 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4198 template<
typename MT >
4199 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4209 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4211 else if( IsSymmetric<MT1>::value )
4260 template<
typename MT1
4264 :
public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
4265 ,
private MatScalarMultExpr
4266 ,
private Computation
4270 typedef DMatDMatMultExpr<MT1,MT2> MMM;
4282 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4287 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4297 template<
typename T1,
typename T2,
typename T3 >
4298 struct CanExploitSymmetry {
4299 enum { value = IsColumnMajorMatrix<T1>::value &&
4300 ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
4309 template<
typename T1,
typename T2,
typename T3 >
4310 struct IsEvaluationRequired {
4311 enum { value = ( evaluateLeft || evaluateRight ) &&
4312 !CanExploitSymmetry<T1,T2,T3>::value };
4321 template<
typename T1,
typename T2,
typename T3,
typename T4 >
4322 struct UseSinglePrecisionKernel {
4324 HasMutableDataAccess<T1>::value &&
4325 HasConstDataAccess<T2>::value &&
4326 HasConstDataAccess<T3>::value &&
4327 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4328 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4329 IsFloat<typename T1::ElementType>::value &&
4330 IsFloat<typename T2::ElementType>::value &&
4331 IsFloat<typename T3::ElementType>::value &&
4332 !IsComplex<T4>::value };
4341 template<
typename T1,
typename T2,
typename T3,
typename T4 >
4342 struct UseDoublePrecisionKernel {
4344 HasMutableDataAccess<T1>::value &&
4345 HasConstDataAccess<T2>::value &&
4346 HasConstDataAccess<T3>::value &&
4347 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4348 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4349 IsDouble<typename T1::ElementType>::value &&
4350 IsDouble<typename T2::ElementType>::value &&
4351 IsDouble<typename T3::ElementType>::value &&
4352 !IsComplex<T4>::value };
4361 template<
typename T1,
typename T2,
typename T3 >
4362 struct UseSinglePrecisionComplexKernel {
4363 typedef complex<float> Type;
4365 HasMutableDataAccess<T1>::value &&
4366 HasConstDataAccess<T2>::value &&
4367 HasConstDataAccess<T3>::value &&
4368 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4369 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4370 IsSame<typename T1::ElementType,Type>::value &&
4371 IsSame<typename T2::ElementType,Type>::value &&
4372 IsSame<typename T3::ElementType,Type>::value };
4381 template<
typename T1,
typename T2,
typename T3 >
4382 struct UseDoublePrecisionComplexKernel {
4383 typedef complex<double> Type;
4385 HasMutableDataAccess<T1>::value &&
4386 HasConstDataAccess<T2>::value &&
4387 HasConstDataAccess<T3>::value &&
4388 !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4389 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4390 IsSame<typename T1::ElementType,Type>::value &&
4391 IsSame<typename T2::ElementType,Type>::value &&
4392 IsSame<typename T3::ElementType,Type>::value };
4400 template<
typename T1,
typename T2,
typename T3,
typename T4 >
4401 struct UseDefaultKernel {
4402 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
4403 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
4404 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
4405 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
4413 template<
typename T1,
typename T2,
typename T3,
typename T4 >
4414 struct UseVectorizedDefaultKernel {
4415 enum { value = !IsDiagonal<T3>::value &&
4416 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4417 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
4418 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
4419 IsSame<typename T1::ElementType,T4>::value &&
4420 IntrinsicTrait<typename T1::ElementType>::addition &&
4421 IntrinsicTrait<typename T1::ElementType>::subtraction &&
4422 IntrinsicTrait<typename T1::ElementType>::multiplication };
4428 typedef DMatScalarMultExpr<MMM,ST,false>
This;
4429 typedef typename MultTrait<RES,ST>::Type
ResultType;
4433 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
4438 typedef const DMatDMatMultExpr<MT1,MT2>
LeftOperand;
4444 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
4447 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
4452 enum { vectorizable = !IsDiagonal<MT2>::value &&
4453 MT1::vectorizable && MT2::vectorizable &&
4454 IsSame<ET1,ET2>::value &&
4455 IsSame<ET1,ST>::value &&
4456 IntrinsicTrait<ET1>::addition &&
4457 IntrinsicTrait<ET1>::multiplication };
4460 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4461 !evaluateRight && MT2::smpAssignable };
4470 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
4483 inline ReturnType
operator()(
size_t i,
size_t j )
const {
4486 return matrix_(i,j) * scalar_;
4495 inline size_t rows()
const {
4496 return matrix_.rows();
4505 inline size_t columns()
const {
4506 return matrix_.columns();
4536 template<
typename T >
4537 inline bool canAlias(
const T* alias )
const {
4538 return matrix_.canAlias( alias );
4548 template<
typename T >
4549 inline bool isAliased(
const T* alias )
const {
4550 return matrix_.isAliased( alias );
4560 return matrix_.isAligned();
4570 typename MMM::LeftOperand A( matrix_.leftOperand() );
4579 LeftOperand matrix_;
4580 RightOperand scalar_;
4595 template<
typename MT
4597 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4598 assign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4605 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4606 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4608 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
4611 else if( left.columns() == 0UL ) {
4626 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4641 template<
typename MT3
4645 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4647 if( ( IsDiagonal<MT5>::value ) ||
4649 selectSmallAssignKernel( C, A, B, scalar );
4651 selectBlasAssignKernel( C, A, B, scalar );
4669 template<
typename MT3
4673 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
4674 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4676 const size_t M( A.rows() );
4677 const size_t N( B.columns() );
4678 const size_t K( A.columns() );
4680 for(
size_t i=0UL; i<M; ++i )
4682 const size_t kbegin( ( IsUpper<MT4>::value )
4683 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4685 const size_t kend( ( IsLower<MT4>::value )
4686 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4690 if( IsStrictlyTriangular<MT4>::value && kbegin == kend ) {
4691 for(
size_t j=0UL; j<N; ++j ) {
4698 const size_t jbegin( ( IsUpper<MT5>::value )
4699 ?( IsStrictlyUpper<MT5>::value ? kbegin+1UL : kbegin )
4701 const size_t jend( ( IsLower<MT5>::value )
4702 ?( IsStrictlyLower<MT5>::value ? kbegin : kbegin+1UL )
4706 if( IsUpper<MT4>::value && IsUpper<MT5>::value ) {
4707 for(
size_t j=0UL; j<jbegin; ++j ) {
4711 else if( IsStrictlyUpper<MT5>::value ) {
4714 for(
size_t j=jbegin; j<jend; ++j ) {
4715 C(i,j) = A(i,kbegin) * B(kbegin,j);
4717 if( IsLower<MT4>::value && IsLower<MT5>::value ) {
4718 for(
size_t j=jend; j<N; ++j ) {
4722 else if( IsStrictlyLower<MT5>::value ) {
4723 reset( C(i,N-1UL) );
4727 for(
size_t k=kbegin+1UL; k<kend; ++k )
4729 const size_t jbegin( ( IsUpper<MT5>::value )
4730 ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
4732 const size_t jend( ( IsLower<MT5>::value )
4733 ?( IsStrictlyLower<MT5>::value ? k-1UL : k )
4737 for(
size_t j=jbegin; j<jend; ++j ) {
4738 C(i,j) += A(i,k) * B(k,j);
4740 if( IsLower<MT5>::value ) {
4741 C(i,jend) = A(i,k) * B(k,jend);
4746 const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4747 ?( IsStrictlyUpper<MT4>::value || IsStrictlyUpper<MT5>::value ? i+1UL : i )
4749 const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4750 ?( IsStrictlyLower<MT4>::value || IsStrictlyLower<MT5>::value ? i : i+1UL )
4754 for(
size_t j=jbegin; j<jend; ++j ) {
4776 template<
typename MT3
4780 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
4781 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4785 const size_t M( A.rows() );
4786 const size_t N( B.columns() );
4788 for(
size_t i=0UL; i<M; ++i )
4790 const size_t jbegin( ( IsUpper<MT4>::value )
4791 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4793 const size_t jend( ( IsLower<MT4>::value )
4794 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4798 if( IsUpper<MT4>::value ) {
4799 for(
size_t j=0UL; j<jbegin; ++j ) {
4803 for(
size_t j=jbegin; j<jend; ++j ) {
4804 C(i,j) = A(i,j) * B(j,j) * scalar;
4806 if( IsLower<MT4>::value ) {
4807 for(
size_t j=jend; j<N; ++j ) {
4829 template<
typename MT3
4833 static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
4834 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4838 const size_t M( A.rows() );
4839 const size_t N( B.columns() );
4841 for(
size_t i=0UL; i<M; ++i )
4843 const size_t jbegin( ( IsUpper<MT5>::value )
4844 ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
4846 const size_t jend( ( IsLower<MT5>::value )
4847 ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
4851 if( IsUpper<MT5>::value ) {
4852 for(
size_t j=0UL; j<jbegin; ++j ) {
4856 for(
size_t j=jbegin; j<jend; ++j ) {
4857 C(i,j) = A(i,i) * B(i,j) * scalar;
4859 if( IsLower<MT5>::value ) {
4860 for(
size_t j=jend; j<N; ++j ) {
4882 template<
typename MT3
4886 static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
4887 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4893 for(
size_t i=0UL; i<A.rows(); ++i ) {
4894 C(i,i) = A(i,i) * B(i,i) * scalar;
4913 template<
typename MT3
4917 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4918 selectSmallAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4920 selectDefaultAssignKernel( C, A, B, scalar );
4939 template<
typename MT3
4943 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4944 selectSmallAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4946 typedef IntrinsicTrait<ElementType> IT;
4948 const size_t M( A.rows() );
4949 const size_t N( B.columns() );
4950 const size_t K( A.columns() );
4952 const IntrinsicType factor(
set( scalar ) );
4957 for(
size_t i=0UL; i<M; ++i )
4959 const size_t kbegin( ( IsUpper<MT4>::value )
4960 ?( ( IsLower<MT5>::value )
4961 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4962 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4963 :( IsLower<MT5>::value ? j : 0UL ) );
4964 const size_t kend( ( IsLower<MT4>::value )
4965 ?( ( IsUpper<MT5>::value )
4966 ?(
min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+
IT::size*8UL, K ) )
4967 :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4968 :( IsUpper<MT5>::value ?
min( j+
IT::size*8UL, K ) : K ) );
4970 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4972 for(
size_t k=kbegin; k<kend; ++k ) {
4973 const IntrinsicType a1(
set( A(i,k) ) );
4974 xmm1 = xmm1 + a1 * B.load(k,j );
4975 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
4976 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
4977 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
4978 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
4979 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
4980 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
4981 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
4984 (~C).
store( i, j , xmm1 * factor );
4999 for( ; (i+2UL) <= M; i+=2UL )
5001 const size_t kbegin( ( IsUpper<MT4>::value )
5002 ?( ( IsLower<MT5>::value )
5003 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5004 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5005 :( IsLower<MT5>::value ? j : 0UL ) );
5006 const size_t kend( ( IsLower<MT4>::value )
5007 ?( ( IsUpper<MT5>::value )
5008 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*4UL, K ) )
5009 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5010 :( IsUpper<MT5>::value ?
min( j+
IT::size*4UL, K ) : K ) );
5012 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5014 for(
size_t k=kbegin; k<kend; ++k ) {
5015 const IntrinsicType a1(
set( A(i ,k) ) );
5016 const IntrinsicType a2(
set( A(i+1UL,k) ) );
5017 const IntrinsicType b1( B.load(k,j ) );
5018 const IntrinsicType b2( B.load(k,j+
IT::size ) );
5019 const IntrinsicType b3( B.load(k,j+
IT::size*2UL) );
5020 const IntrinsicType b4( B.load(k,j+
IT::size*3UL) );
5021 xmm1 = xmm1 + a1 * b1;
5022 xmm2 = xmm2 + a1 * b2;
5023 xmm3 = xmm3 + a1 * b3;
5024 xmm4 = xmm4 + a1 * b4;
5025 xmm5 = xmm5 + a2 * b1;
5026 xmm6 = xmm6 + a2 * b2;
5027 xmm7 = xmm7 + a2 * b3;
5028 xmm8 = xmm8 + a2 * b4;
5031 (~C).
store( i , j , xmm1 * factor );
5035 (~C).
store( i+1UL, j , xmm5 * factor );
5043 const size_t kbegin( ( IsUpper<MT4>::value )
5044 ?( ( IsLower<MT5>::value )
5045 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5046 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5047 :( IsLower<MT5>::value ? j : 0UL ) );
5048 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, K ) ):( K ) );
5050 IntrinsicType xmm1, xmm2, xmm3, xmm4;
5052 for(
size_t k=kbegin; k<kend; ++k ) {
5053 const IntrinsicType a1(
set( A(i,k) ) );
5054 xmm1 = xmm1 + a1 * B.load(k,j );
5055 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
5056 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
5057 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
5060 (~C).
store( i, j , xmm1 * factor );
5071 for( ; (i+2UL) <= M; i+=2UL )
5073 const size_t kbegin( ( IsUpper<MT4>::value )
5074 ?( ( IsLower<MT5>::value )
5075 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5076 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5077 :( IsLower<MT5>::value ? j : 0UL ) );
5078 const size_t kend( ( IsLower<MT4>::value )
5079 ?( ( IsUpper<MT5>::value )
5080 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*2UL, K ) )
5081 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5082 :( IsUpper<MT5>::value ?
min( j+
IT::size*2UL, K ) : K ) );
5084 IntrinsicType xmm1, xmm2, xmm3, xmm4;
5086 for(
size_t k=kbegin; k<kend; ++k ) {
5087 const IntrinsicType a1(
set( A(i ,k) ) );
5088 const IntrinsicType a2(
set( A(i+1UL,k) ) );
5089 const IntrinsicType b1( B.load(k,j ) );
5090 const IntrinsicType b2( B.load(k,j+
IT::size) );
5091 xmm1 = xmm1 + a1 * b1;
5092 xmm2 = xmm2 + a1 * b2;
5093 xmm3 = xmm3 + a2 * b1;
5094 xmm4 = xmm4 + a2 * b2;
5097 (~C).
store( i , j , xmm1 * factor );
5099 (~C).
store( i+1UL, j , xmm3 * factor );
5105 const size_t kbegin( ( IsUpper<MT4>::value )
5106 ?( ( IsLower<MT5>::value )
5107 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5108 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5109 :( IsLower<MT5>::value ? j : 0UL ) );
5110 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, K ) ):( K ) );
5112 IntrinsicType xmm1, xmm2;
5114 for(
size_t k=kbegin; k<kend; ++k ) {
5115 const IntrinsicType a1(
set( A(i,k) ) );
5116 xmm1 = xmm1 + a1 * B.load(k,j );
5117 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
5120 (~C).
store( i, j , xmm1 * factor );
5129 for( ; (i+2UL) <= M; i+=2UL )
5131 const size_t kbegin( ( IsUpper<MT4>::value )
5132 ?( ( IsLower<MT5>::value )
5133 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5134 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5135 :( IsLower<MT5>::value ? j : 0UL ) );
5136 const size_t kend( ( IsLower<MT4>::value )
5137 ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
5140 IntrinsicType xmm1, xmm2;
5142 for(
size_t k=kbegin; k<kend; ++k ) {
5143 const IntrinsicType b1( B.load(k,j) );
5144 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
5145 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
5148 (~C).
store( i , j, xmm1 * factor );
5149 (~C).
store( i+1UL, j, xmm2 * factor );
5154 const size_t kbegin( ( IsUpper<MT4>::value )
5155 ?( ( IsLower<MT5>::value )
5156 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5157 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5158 :( IsLower<MT5>::value ? j : 0UL ) );
5162 for(
size_t k=kbegin; k<K; ++k ) {
5163 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
5166 (~C).
store( i, j, xmm1 * factor );
5187 template<
typename MT3
5191 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5192 selectSmallAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
5199 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
5201 assign( ~C, tmp * B * scalar );
5203 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
5205 assign( ~C, A * tmp * scalar );
5207 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
5209 assign( ~C, tmp * B * scalar );
5213 assign( ~C, A * tmp * scalar );
5232 template<
typename MT3
5236 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5237 selectLargeAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5239 selectDefaultAssignKernel( C, A, B, scalar );
5258 template<
typename MT3
5262 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5263 selectLargeAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
5265 typedef IntrinsicTrait<ElementType> IT;
5267 const size_t M( A.rows() );
5268 const size_t N( B.columns() );
5269 const size_t K( A.columns() );
5271 const size_t iblock( 64UL );
5272 const size_t jblock( 128UL );
5273 const size_t kblock( 128UL );
5275 const IntrinsicType factor(
set( scalar ) );
5277 for(
size_t jj=0UL; jj<N; jj+=jblock )
5279 const size_t jend(
min( jj+jblock, N ) );
5281 for(
size_t ii=0UL; ii<M; ii+=iblock )
5283 const size_t iend(
min( ii+iblock, M ) );
5285 for(
size_t i=ii; i<iend; ++i ) {
5286 for(
size_t j=jj; j<jend; ++j ) {
5291 for(
size_t kk=0UL; kk<K; kk+=kblock )
5293 const size_t ktmp(
min( kk+kblock, K ) );
5305 for( ; (i+2UL) <= iend; i+=2UL )
5307 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
5308 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
5309 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
5310 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
5312 IntrinsicType xmm1( (~C).
load(i ,j ) );
5313 IntrinsicType xmm2( (~C).
load(i ,j1) );
5314 IntrinsicType xmm3( (~C).
load(i ,j2) );
5315 IntrinsicType xmm4( (~C).
load(i ,j3) );
5316 IntrinsicType xmm5( (~C).
load(i+1UL,j ) );
5317 IntrinsicType xmm6( (~C).
load(i+1UL,j1) );
5318 IntrinsicType xmm7( (~C).
load(i+1UL,j2) );
5319 IntrinsicType xmm8( (~C).
load(i+1UL,j3) );
5321 for(
size_t k=kbegin; k<kend; ++k ) {
5322 const IntrinsicType a1(
set( A(i ,k) ) );
5323 const IntrinsicType a2(
set( A(i+1UL,k) ) );
5324 const IntrinsicType b1( B.load(k,j ) );
5325 const IntrinsicType b2( B.load(k,j1) );
5326 const IntrinsicType b3( B.load(k,j2) );
5327 const IntrinsicType b4( B.load(k,j3) );
5328 xmm1 = xmm1 + a1 * b1;
5329 xmm2 = xmm2 + a1 * b2;
5330 xmm3 = xmm3 + a1 * b3;
5331 xmm4 = xmm4 + a1 * b4;
5332 xmm5 = xmm5 + a2 * b1;
5333 xmm6 = xmm6 + a2 * b2;
5334 xmm7 = xmm7 + a2 * b3;
5335 xmm8 = xmm8 + a2 * b4;
5338 (~C).
store( i , j , xmm1 * factor );
5339 (~C).
store( i , j1, xmm2 * factor );
5340 (~C).
store( i , j2, xmm3 * factor );
5341 (~C).
store( i , j3, xmm4 * factor );
5342 (~C).
store( i+1UL, j , xmm5 * factor );
5343 (~C).
store( i+1UL, j1, xmm6 * factor );
5344 (~C).
store( i+1UL, j2, xmm7 * factor );
5345 (~C).
store( i+1UL, j3, xmm8 * factor );
5350 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
5351 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
5352 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
5353 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
5355 IntrinsicType xmm1( (~C).
load(i,j ) );
5356 IntrinsicType xmm2( (~C).
load(i,j1) );
5357 IntrinsicType xmm3( (~C).
load(i,j2) );
5358 IntrinsicType xmm4( (~C).
load(i,j3) );
5360 for(
size_t k=kbegin; k<kend; ++k ) {
5361 const IntrinsicType a1(
set( A(i,k) ) );
5362 xmm1 = xmm1 + a1 * B.load(k,j );
5363 xmm2 = xmm2 + a1 * B.load(k,j1);
5364 xmm3 = xmm3 + a1 * B.load(k,j2);
5365 xmm4 = xmm4 + a1 * B.load(k,j3);
5368 (~C).
store( i, j , xmm1 * factor );
5369 (~C).
store( i, j1, xmm2 * factor );
5370 (~C).
store( i, j2, xmm3 * factor );
5371 (~C).
store( i, j3, xmm4 * factor );
5381 for( ; (i+4UL) <= iend; i+=4UL )
5383 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
5384 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
5385 const size_t kend (
min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
5386 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
5388 IntrinsicType xmm1( (~C).
load(i ,j ) );
5389 IntrinsicType xmm2( (~C).
load(i ,j1) );
5390 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
5391 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
5392 IntrinsicType xmm5( (~C).
load(i+2UL,j ) );
5393 IntrinsicType xmm6( (~C).
load(i+2UL,j1) );
5394 IntrinsicType xmm7( (~C).
load(i+3UL,j ) );
5395 IntrinsicType xmm8( (~C).
load(i+3UL,j1) );
5397 for(
size_t k=kbegin; k<kend; ++k ) {
5398 const IntrinsicType a1(
set( A(i ,k) ) );
5399 const IntrinsicType a2(
set( A(i+1UL,k) ) );
5400 const IntrinsicType a3(
set( A(i+2UL,k) ) );
5401 const IntrinsicType a4(
set( A(i+3UL,k) ) );
5402 const IntrinsicType b1( B.load(k,j ) );
5403 const IntrinsicType b2( B.load(k,j1) );
5404 xmm1 = xmm1 + a1 * b1;
5405 xmm2 = xmm2 + a1 * b2;
5406 xmm3 = xmm3 + a2 * b1;
5407 xmm4 = xmm4 + a2 * b2;
5408 xmm5 = xmm5 + a3 * b1;
5409 xmm6 = xmm6 + a3 * b2;
5410 xmm7 = xmm7 + a4 * b1;
5411 xmm8 = xmm8 + a4 * b2;
5414 (~C).
store( i , j , xmm1 * factor );
5415 (~C).
store( i , j1, xmm2 * factor );
5416 (~C).
store( i+1UL, j , xmm3 * factor );
5417 (~C).
store( i+1UL, j1, xmm4 * factor );
5418 (~C).
store( i+2UL, j , xmm5 * factor );
5419 (~C).
store( i+2UL, j1, xmm6 * factor );
5420 (~C).
store( i+3UL, j , xmm7 * factor );
5421 (~C).
store( i+3UL, j1, xmm8 * factor );
5424 for( ; (i+2UL) <= iend; i+=2UL )
5426 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
5427 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
5428 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
5429 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
5431 IntrinsicType xmm1( (~C).
load(i ,j ) );
5432 IntrinsicType xmm2( (~C).
load(i ,j1) );
5433 IntrinsicType xmm3( (~C).
load(i+1UL,j ) );
5434 IntrinsicType xmm4( (~C).
load(i+1UL,j1) );
5436 for(
size_t k=kbegin; k<kend; ++k ) {
5437 const IntrinsicType a1(
set( A(i ,k) ) );
5438 const IntrinsicType a2(
set( A(i+1UL,k) ) );
5439 const IntrinsicType b1( B.load(k,j ) );
5440 const IntrinsicType b2( B.load(k,j1) );
5441 xmm1 = xmm1 + a1 * b1;
5442 xmm2 = xmm2 + a1 * b2;
5443 xmm3 = xmm3 + a2 * b1;
5444 xmm4 = xmm4 + a2 * b2;
5447 (~C).
store( i , j , xmm1 * factor );
5448 (~C).
store( i , j1, xmm2 * factor );
5449 (~C).
store( i+1UL, j , xmm3 * factor );
5450 (~C).
store( i+1UL, j1, xmm4 * factor );
5455 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
5456 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
5457 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
5458 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
5460 IntrinsicType xmm1( (~C).
load(i,j ) );
5461 IntrinsicType xmm2( (~C).
load(i,j1) );
5463 for(
size_t k=kbegin; k<kend; ++k ) {
5464 const IntrinsicType a1(
set( A(i,k) ) );
5465 xmm1 = xmm1 + a1 * B.load(k,j );
5466 xmm2 = xmm2 + a1 * B.load(k,j1);
5469 (~C).
store( i, j , xmm1 * factor );
5470 (~C).
store( i, j1, xmm2 * factor );
5476 for(
size_t i=ii; i<iend; ++i )
5478 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
5479 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
5480 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
5481 ( IsUpper<MT5>::value )?(
min( j+
IT::size, ktmp ) ):( ktmp ) ) );
5483 IntrinsicType xmm1( (~C).
load(i,j) );
5485 for(
size_t k=kbegin; k<kend; ++k ) {
5486 const IntrinsicType a1(
set( A(i,k) ) );
5487 xmm1 = xmm1 + a1 * B.load(k,j);
5490 (~C).
store( i, j, xmm1 * factor );
5513 template<
typename MT3
5517 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5518 selectLargeAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
5520 selectSmallAssignKernel( ~C, A, B, scalar );
5538 template<
typename MT3
5542 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5543 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5545 selectLargeAssignKernel( C, A, B, scalar );
5564 template<
typename MT3
5568 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5569 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5571 if( IsTriangular<MT4>::value ) {
5573 strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5575 else if( IsTriangular<MT5>::value ) {
5577 strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5580 sgemm( C, A, B, scalar, 0.0F );
5601 template<
typename MT3
5605 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5606 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5608 if( IsTriangular<MT4>::value ) {
5610 dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5612 else if( IsTriangular<MT5>::value ) {
5614 dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5617 dgemm( C, A, B, scalar, 0.0 );
5638 template<
typename MT3
5642 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5643 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5645 if( IsTriangular<MT4>::value ) {
5647 ctrmm( C, A, CblasLeft,
5648 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5649 complex<float>( scalar, 0.0F ) );
5651 else if( IsTriangular<MT5>::value ) {
5653 ctrmm( C, B, CblasRight,
5654 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5655 complex<float>( scalar, 0.0F ) );
5658 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
5679 template<
typename MT3
5683 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5684 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5686 if( IsTriangular<MT4>::value ) {
5688 ztrmm( C, A, CblasLeft,
5689 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5690 complex<double>( scalar, 0.0 ) );
5692 else if( IsTriangular<MT5>::value ) {
5694 ztrmm( C, B, CblasRight,
5695 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5696 complex<double>( scalar, 0.0 ) );
5699 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
5717 template<
typename MT
5719 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5720 assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
5724 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
5736 const TmpType tmp(
serial( rhs ) );
5755 template<
typename MT >
5756 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5757 assign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
5766 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5767 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5769 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
5771 else if( IsSymmetric<MT1>::value )
5772 assign( ~lhs,
trans( left ) * right * rhs.scalar_ );
5774 assign( ~lhs, left *
trans( right ) * rhs.scalar_ );
5790 template<
typename MT
5792 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5793 addAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
5800 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5801 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5803 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
5817 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5832 template<
typename MT3
5836 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5838 if( ( IsDiagonal<MT5>::value ) ||
5840 selectSmallAddAssignKernel( C, A, B, scalar );
5842 selectBlasAddAssignKernel( C, A, B, scalar );
5860 template<
typename MT3
5864 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
5865 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5867 const ResultType tmp(
serial( A * B * scalar ) );
5886 template<
typename MT3
5890 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
5891 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5895 const size_t M( A.rows() );
5896 const size_t N( B.columns() );
5898 for(
size_t i=0UL; i<M; ++i )
5900 const size_t jbegin( ( IsUpper<MT4>::value )
5901 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
5903 const size_t jend( ( IsLower<MT4>::value )
5904 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
5908 const size_t jnum( jend - jbegin );
5909 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
5911 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
5912 C(i,j ) += A(i,j ) * B(j ,j ) * scalar;
5913 C(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
5916 C(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
5936 template<
typename MT3
5940 static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
5941 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5945 const size_t M( A.rows() );
5946 const size_t N( B.columns() );
5948 for(
size_t i=0UL; i<M; ++i )
5950 const size_t jbegin( ( IsUpper<MT5>::value )
5951 ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
5953 const size_t jend( ( IsLower<MT5>::value )
5954 ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
5958 const size_t jnum( jend - jbegin );
5959 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
5961 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
5962 C(i,j ) += A(i,i) * B(i,j ) * scalar;
5963 C(i,j+1UL) += A(i,i) * B(i,j+1UL) * scalar;
5966 C(i,jpos) += A(i,i) * B(i,jpos) * scalar;
5986 template<
typename MT3
5990 static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
5991 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
5995 for(
size_t i=0UL; i<A.rows(); ++i ) {
5996 C(i,i) += A(i,i) * B(i,i) * scalar;
6015 template<
typename MT3
6019 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6020 selectSmallAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6022 selectDefaultAddAssignKernel( C, A, B, scalar );
6041 template<
typename MT3
6045 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6046 selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
6048 typedef IntrinsicTrait<ElementType> IT;
6050 const size_t M( A.rows() );
6051 const size_t N( B.columns() );
6052 const size_t K( A.columns() );
6054 const IntrinsicType factor(
set( scalar ) );
6059 for(
size_t i=0UL; i<M; ++i )
6061 const size_t kbegin( ( IsUpper<MT4>::value )
6062 ?( ( IsLower<MT5>::value )
6063 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6064 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6065 :( IsLower<MT5>::value ? j : 0UL ) );
6066 const size_t kend( ( IsLower<MT4>::value )
6067 ?( ( IsUpper<MT5>::value )
6068 ?(
min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+
IT::size*8UL, K ) )
6069 :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
6070 :( IsUpper<MT5>::value ?
min( j+
IT::size*8UL, K ) : K ) );
6072 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6074 for(
size_t k=kbegin; k<kend; ++k ) {
6075 const IntrinsicType a1(
set( A(i,k) ) );
6076 xmm1 = xmm1 + a1 * B.load(k,j );
6077 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
6078 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
6079 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
6080 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
6081 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
6082 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
6083 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
6086 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
6101 for( ; (i+2UL) <= M; i+=2UL )
6103 const size_t kbegin( ( IsUpper<MT4>::value )
6104 ?( ( IsLower<MT5>::value )
6105 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6106 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6107 :( IsLower<MT5>::value ? j : 0UL ) );
6108 const size_t kend( ( IsLower<MT4>::value )
6109 ?( ( IsUpper<MT5>::value )
6110 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*4UL, K ) )
6111 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6112 :( IsUpper<MT5>::value ?
min( j+
IT::size*4UL, K ) : K ) );
6114 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6116 for(
size_t k=kbegin; k<kend; ++k ) {
6117 const IntrinsicType a1(
set( A(i ,k) ) );
6118 const IntrinsicType a2(
set( A(i+1UL,k) ) );
6119 const IntrinsicType b1( B.load(k,j ) );
6120 const IntrinsicType b2( B.load(k,j+
IT::size ) );
6121 const IntrinsicType b3( B.load(k,j+
IT::size*2UL) );
6122 const IntrinsicType b4( B.load(k,j+
IT::size*3UL) );
6123 xmm1 = xmm1 + a1 * b1;
6124 xmm2 = xmm2 + a1 * b2;
6125 xmm3 = xmm3 + a1 * b3;
6126 xmm4 = xmm4 + a1 * b4;
6127 xmm5 = xmm5 + a2 * b1;
6128 xmm6 = xmm6 + a2 * b2;
6129 xmm7 = xmm7 + a2 * b3;
6130 xmm8 = xmm8 + a2 * b4;
6133 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6137 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
6145 const size_t kbegin( ( IsUpper<MT4>::value )
6146 ?( ( IsLower<MT5>::value )
6147 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6148 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6149 :( IsLower<MT5>::value ? j : 0UL ) );
6150 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, K ) ):( K ) );
6152 IntrinsicType xmm1, xmm2, xmm3, xmm4;
6154 for(
size_t k=kbegin; k<kend; ++k ) {
6155 const IntrinsicType a1(
set( A(i,k) ) );
6156 xmm1 = xmm1 + a1 * B.load(k,j );
6157 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
6158 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
6159 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
6162 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
6173 for( ; (i+2UL) <= M; i+=2UL )
6175 const size_t kbegin( ( IsUpper<MT4>::value )
6176 ?( ( IsLower<MT5>::value )
6177 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6178 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6179 :( IsLower<MT5>::value ? j : 0UL ) );
6180 const size_t kend( ( IsLower<MT4>::value )
6181 ?( ( IsUpper<MT5>::value )
6182 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*2UL, K ) )
6183 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6184 :( IsUpper<MT5>::value ?
min( j+
IT::size*2UL, K ) : K ) );
6186 IntrinsicType xmm1, xmm2, xmm3, xmm4;
6188 for(
size_t k=kbegin; k<kend; ++k ) {
6189 const IntrinsicType a1(
set( A(i ,k) ) );
6190 const IntrinsicType a2(
set( A(i+1UL,k) ) );
6191 const IntrinsicType b1( B.load(k,j ) );
6192 const IntrinsicType b2( B.load(k,j+
IT::size) );
6193 xmm1 = xmm1 + a1 * b1;
6194 xmm2 = xmm2 + a1 * b2;
6195 xmm3 = xmm3 + a2 * b1;
6196 xmm4 = xmm4 + a2 * b2;
6199 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6201 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
6207 const size_t kbegin( ( IsUpper<MT4>::value )
6208 ?( ( IsLower<MT5>::value )
6209 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6210 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6211 :( IsLower<MT5>::value ? j : 0UL ) );
6212 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, K ) ):( K ) );
6214 IntrinsicType xmm1, xmm2;
6216 for(
size_t k=kbegin; k<kend; ++k ) {
6217 const IntrinsicType a1(
set( A(i,k) ) );
6218 xmm1 = xmm1 + a1 * B.load(k,j );
6219 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
6222 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
6231 for( ; (i+2UL) <= M; i+=2UL )
6233 const size_t kbegin( ( IsUpper<MT4>::value )
6234 ?( ( IsLower<MT5>::value )
6235 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6236 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6237 :( IsLower<MT5>::value ? j : 0UL ) );
6238 const size_t kend( ( IsLower<MT4>::value )
6239 ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
6242 IntrinsicType xmm1, xmm2;
6244 for(
size_t k=kbegin; k<kend; ++k ) {
6245 const IntrinsicType b1( B.load(k,j) );
6246 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
6247 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
6250 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
6251 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
6256 const size_t kbegin( ( IsUpper<MT4>::value )
6257 ?( ( IsLower<MT5>::value )
6258 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6259 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6260 :( IsLower<MT5>::value ? j : 0UL ) );
6264 for(
size_t k=kbegin; k<K; ++k ) {
6265 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
6268 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
6289 template<
typename MT3
6293 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6294 selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
6301 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
6305 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
6309 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
6334 template<
typename MT3
6338 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6339 selectLargeAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6341 selectDefaultAddAssignKernel( C, A, B, scalar );
6360 template<
typename MT3
6364 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6365 selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
6367 typedef IntrinsicTrait<ElementType> IT;
6369 const size_t M( A.rows() );
6370 const size_t N( B.columns() );
6371 const size_t K( A.columns() );
6373 const size_t iblock( 64UL );
6374 const size_t jblock( 128UL );
6375 const size_t kblock( 128UL );
6377 const IntrinsicType factor(
set( scalar ) );
6379 for(
size_t jj=0UL; jj<N; jj+=jblock )
6381 const size_t jend(
min( jj+jblock, N ) );
6383 for(
size_t ii=0UL; ii<M; ii+=iblock )
6385 const size_t iend(
min( ii+iblock, M ) );
6387 for(
size_t kk=0UL; kk<K; kk+=kblock )
6389 const size_t ktmp(
min( kk+kblock, K ) );
6401 for( ; (i+2UL) <= iend; i+=2UL )
6403 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
6404 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
6405 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
6406 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
6408 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6410 for(
size_t k=kbegin; k<kend; ++k ) {
6411 const IntrinsicType a1(
set( A(i ,k) ) );
6412 const IntrinsicType a2(
set( A(i+1UL,k) ) );
6413 const IntrinsicType b1( B.load(k,j ) );
6414 const IntrinsicType b2( B.load(k,j1) );
6415 const IntrinsicType b3( B.load(k,j2) );
6416 const IntrinsicType b4( B.load(k,j3) );
6417 xmm1 = xmm1 + a1 * b1;
6418 xmm2 = xmm2 + a1 * b2;
6419 xmm3 = xmm3 + a1 * b3;
6420 xmm4 = xmm4 + a1 * b4;
6421 xmm5 = xmm5 + a2 * b1;
6422 xmm6 = xmm6 + a2 * b2;
6423 xmm7 = xmm7 + a2 * b3;
6424 xmm8 = xmm8 + a2 * b4;
6427 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6428 (~C).
store( i , j1, (~C).load(i ,j1) + xmm2 * factor );
6429 (~C).
store( i , j2, (~C).load(i ,j2) + xmm3 * factor );
6430 (~C).
store( i , j3, (~C).load(i ,j3) + xmm4 * factor );
6431 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
6432 (~C).
store( i+1UL, j1, (~C).load(i+1UL,j1) + xmm6 * factor );
6433 (~C).
store( i+1UL, j2, (~C).load(i+1UL,j2) + xmm7 * factor );
6434 (~C).
store( i+1UL, j3, (~C).load(i+1UL,j3) + xmm8 * factor );
6439 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
6440 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
6441 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
6442 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
6444 IntrinsicType xmm1, xmm2, xmm3, xmm4;
6446 for(
size_t k=kbegin; k<kend; ++k ) {
6447 const IntrinsicType a1(
set( A(i,k) ) );
6448 xmm1 = xmm1 + a1 * B.load(k,j );
6449 xmm2 = xmm2 + a1 * B.load(k,j1);
6450 xmm3 = xmm3 + a1 * B.load(k,j2);
6451 xmm4 = xmm4 + a1 * B.load(k,j3);
6454 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
6455 (~C).
store( i, j1, (~C).load(i,j1) + xmm2 * factor );
6456 (~C).
store( i, j2, (~C).load(i,j2) + xmm3 * factor );
6457 (~C).
store( i, j3, (~C).load(i,j3) + xmm4 * factor );
6467 for( ; (i+4UL) <= iend; i+=4UL )
6469 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
6470 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
6471 const size_t kend (
min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
6472 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
6474 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6476 for(
size_t k=kbegin; k<kend; ++k ) {
6477 const IntrinsicType a1(
set( A(i ,k) ) );
6478 const IntrinsicType a2(
set( A(i+1UL,k) ) );
6479 const IntrinsicType a3(
set( A(i+2UL,k) ) );
6480 const IntrinsicType a4(
set( A(i+3UL,k) ) );
6481 const IntrinsicType b1( B.load(k,j ) );
6482 const IntrinsicType b2( B.load(k,j1) );
6483 xmm1 = xmm1 + a1 * b1;
6484 xmm2 = xmm2 + a1 * b2;
6485 xmm3 = xmm3 + a2 * b1;
6486 xmm4 = xmm4 + a2 * b2;
6487 xmm5 = xmm5 + a3 * b1;
6488 xmm6 = xmm6 + a3 * b2;
6489 xmm7 = xmm7 + a4 * b1;
6490 xmm8 = xmm8 + a4 * b2;
6493 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6494 (~C).
store( i , j1, (~C).load(i ,j1) + xmm2 * factor );
6495 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
6496 (~C).
store( i+1UL, j1, (~C).load(i+1UL,j1) + xmm4 * factor );
6497 (~C).
store( i+2UL, j , (~C).load(i+2UL,j ) + xmm5 * factor );
6498 (~C).
store( i+2UL, j1, (~C).load(i+2UL,j1) + xmm6 * factor );
6499 (~C).
store( i+3UL, j , (~C).load(i+3UL,j ) + xmm7 * factor );
6500 (~C).
store( i+3UL, j1, (~C).load(i+3UL,j1) + xmm8 * factor );
6503 for( ; (i+2UL) <= iend; i+=2UL )
6505 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
6506 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
6507 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
6508 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
6510 IntrinsicType xmm1, xmm2, xmm3, xmm4;
6512 for(
size_t k=kbegin; k<kend; ++k ) {
6513 const IntrinsicType a1(
set( A(i ,k) ) );
6514 const IntrinsicType a2(
set( A(i+1UL,k) ) );
6515 const IntrinsicType b1( B.load(k,j ) );
6516 const IntrinsicType b2( B.load(k,j1) );
6517 xmm1 = xmm1 + a1 * b1;
6518 xmm2 = xmm2 + a1 * b2;
6519 xmm3 = xmm3 + a2 * b1;
6520 xmm4 = xmm4 + a2 * b2;
6523 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6524 (~C).
store( i , j1, (~C).load(i ,j1) + xmm2 * factor );
6525 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
6526 (~C).
store( i+1UL, j1, (~C).load(i+1UL,j1) + xmm4 * factor );
6531 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
6532 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
6533 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
6534 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
6536 IntrinsicType xmm1, xmm2;
6538 for(
size_t k=kbegin; k<kend; ++k ) {
6539 const IntrinsicType a1(
set( A(i,k) ) );
6540 xmm1 = xmm1 + a1 * B.load(k,j );
6541 xmm2 = xmm2 + a1 * B.load(k,j1);
6544 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
6545 (~C).
store( i, j1, (~C).load(i,j1) + xmm2 * factor );
6551 for(
size_t i=ii; i<iend; ++i )
6553 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
6554 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
6555 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
6556 ( IsUpper<MT5>::value )?(
min( j+
IT::size, ktmp ) ):( ktmp ) ) );
6560 for(
size_t k=kbegin; k<kend; ++k ) {
6561 const IntrinsicType a1(
set( A(i,k) ) );
6562 xmm1 = xmm1 + a1 * B.load(k,j);
6565 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
6588 template<
typename MT3
6592 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6593 selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
6595 selectSmallAddAssignKernel( ~C, A, B, scalar );
6613 template<
typename MT3
6617 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6618 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6620 selectLargeAddAssignKernel( C, A, B, scalar );
6639 template<
typename MT3
6643 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6644 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6646 if( IsTriangular<MT4>::value ) {
6648 strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6651 else if( IsTriangular<MT5>::value ) {
6653 strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6657 sgemm( C, A, B, scalar, 1.0F );
6678 template<
typename MT3
6682 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6683 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6685 if( IsTriangular<MT4>::value ) {
6687 dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6690 else if( IsTriangular<MT5>::value ) {
6692 dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6696 dgemm( C, A, B, scalar, 1.0 );
6717 template<
typename MT3
6721 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6722 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6724 if( IsTriangular<MT4>::value ) {
6726 ctrmm( tmp, A, CblasLeft,
6727 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6728 complex<float>( scalar, 0.0F ) );
6731 else if( IsTriangular<MT5>::value ) {
6733 ctrmm( tmp, B, CblasRight,
6734 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6735 complex<float>( scalar, 0.0F ) );
6739 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
6760 template<
typename MT3
6764 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6765 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6767 if( IsTriangular<MT4>::value ) {
6769 ztrmm( tmp, A, CblasLeft,
6770 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6771 complex<double>( scalar, 0.0 ) );
6774 else if( IsTriangular<MT5>::value ) {
6776 ztrmm( tmp, B, CblasRight,
6777 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6778 complex<double>( scalar, 0.0 ) );
6782 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
6802 template<
typename MT >
6803 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
6804 addAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
6813 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6814 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6816 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
6818 else if( IsSymmetric<MT1>::value )
6841 template<
typename MT
6843 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
6844 subAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
6851 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6852 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6854 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
6868 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6883 template<
typename MT3
6887 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6889 if( ( IsDiagonal<MT5>::value ) ||
6891 selectSmallSubAssignKernel( C, A, B, scalar );
6893 selectBlasSubAssignKernel( C, A, B, scalar );
6911 template<
typename MT3
6915 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
6916 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6918 const ResultType tmp(
serial( A * B * scalar ) );
6937 template<
typename MT3
6941 static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
6942 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6946 const size_t M( A.rows() );
6947 const size_t N( B.columns() );
6949 for(
size_t i=0UL; i<M; ++i )
6951 const size_t jbegin( ( IsUpper<MT4>::value )
6952 ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6954 const size_t jend( ( IsLower<MT4>::value )
6955 ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6959 const size_t jnum( jend - jbegin );
6960 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
6962 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
6963 C(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
6964 C(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6967 C(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
6987 template<
typename MT3
6991 static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
6992 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
6996 const size_t M( A.rows() );
6997 const size_t N( B.columns() );
6999 for(
size_t i=0UL; i<M; ++i )
7001 const size_t jbegin( ( IsUpper<MT5>::value )
7002 ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
7004 const size_t jend( ( IsLower<MT5>::value )
7005 ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
7009 const size_t jnum( jend - jbegin );
7010 const size_t jpos( jbegin + ( jnum &
size_t(-2) ) );
7012 for(
size_t j=jbegin; j<jpos; j+=2UL ) {
7013 C(i,j ) -= A(i,i) * B(i,j ) * scalar;
7014 C(i,j+1UL) -= A(i,i) * B(i,j+1UL) * scalar;
7017 C(i,jpos) -= A(i,i) * B(i,jpos) * scalar;
7037 template<
typename MT3
7041 static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
7042 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7046 for(
size_t i=0UL; i<A.rows(); ++i ) {
7047 C(i,i) -= A(i,i) * B(i,i) * scalar;
7066 template<
typename MT3
7070 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7071 selectSmallSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7073 selectDefaultSubAssignKernel( C, A, B, scalar );
7092 template<
typename MT3
7096 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7097 selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
7099 typedef IntrinsicTrait<ElementType> IT;
7101 const size_t M( A.rows() );
7102 const size_t N( B.columns() );
7103 const size_t K( A.columns() );
7105 const IntrinsicType factor(
set( scalar ) );
7110 for(
size_t i=0UL; i<M; ++i )
7112 const size_t kbegin( ( IsUpper<MT4>::value )
7113 ?( ( IsLower<MT5>::value )
7114 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7115 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7116 :( IsLower<MT5>::value ? j : 0UL ) );
7117 const size_t kend( ( IsLower<MT4>::value )
7118 ?( ( IsUpper<MT5>::value )
7119 ?(
min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+
IT::size*8UL, K ) )
7120 :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
7121 :( IsUpper<MT5>::value ?
min( j+
IT::size*8UL, K ) : K ) );
7123 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7125 for(
size_t k=kbegin; k<kend; ++k ) {
7126 const IntrinsicType a1(
set( A(i,k) ) );
7127 xmm1 = xmm1 + a1 * B.load(k,j );
7128 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
7129 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
7130 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
7131 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
7132 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
7133 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
7134 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
7137 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
7152 for( ; (i+2UL) <= M; i+=2UL )
7154 const size_t kbegin( ( IsUpper<MT4>::value )
7155 ?( ( IsLower<MT5>::value )
7156 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7157 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7158 :( IsLower<MT5>::value ? j : 0UL ) );
7159 const size_t kend( ( IsLower<MT4>::value )
7160 ?( ( IsUpper<MT5>::value )
7161 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*4UL, K ) )
7162 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
7163 :( IsUpper<MT5>::value ?
min( j+
IT::size*4UL, K ) : K ) );
7165 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7167 for(
size_t k=kbegin; k<kend; ++k ) {
7168 const IntrinsicType a1(
set( A(i ,k) ) );
7169 const IntrinsicType a2(
set( A(i+1UL,k) ) );
7170 const IntrinsicType b1( B.load(k,j ) );
7171 const IntrinsicType b2( B.load(k,j+
IT::size ) );
7172 const IntrinsicType b3( B.load(k,j+
IT::size*2UL) );
7173 const IntrinsicType b4( B.load(k,j+
IT::size*3UL) );
7174 xmm1 = xmm1 + a1 * b1;
7175 xmm2 = xmm2 + a1 * b2;
7176 xmm3 = xmm3 + a1 * b3;
7177 xmm4 = xmm4 + a1 * b4;
7178 xmm5 = xmm5 + a2 * b1;
7179 xmm6 = xmm6 + a2 * b2;
7180 xmm7 = xmm7 + a2 * b3;
7181 xmm8 = xmm8 + a2 * b4;
7184 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7188 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
7196 const size_t kbegin( ( IsUpper<MT4>::value )
7197 ?( ( IsLower<MT5>::value )
7198 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7199 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7200 :( IsLower<MT5>::value ? j : 0UL ) );
7201 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, K ) ):( K ) );
7203 IntrinsicType xmm1, xmm2, xmm3, xmm4;
7205 for(
size_t k=kbegin; k<kend; ++k ) {
7206 const IntrinsicType a1(
set( A(i,k) ) );
7207 xmm1 = xmm1 + a1 * B.load(k,j );
7208 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
7209 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
7210 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
7213 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
7224 for( ; (i+2UL) <= M; i+=2UL )
7226 const size_t kbegin( ( IsUpper<MT4>::value )
7227 ?( ( IsLower<MT5>::value )
7228 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7229 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7230 :( IsLower<MT5>::value ? j : 0UL ) );
7231 const size_t kend( ( IsLower<MT4>::value )
7232 ?( ( IsUpper<MT5>::value )
7233 ?(
min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+
IT::size*2UL, K ) )
7234 :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
7235 :( IsUpper<MT5>::value ?
min( j+
IT::size*2UL, K ) : K ) );
7237 IntrinsicType xmm1, xmm2, xmm3, xmm4;
7239 for(
size_t k=kbegin; k<kend; ++k ) {
7240 const IntrinsicType a1(
set( A(i ,k) ) );
7241 const IntrinsicType a2(
set( A(i+1UL,k) ) );
7242 const IntrinsicType b1( B.load(k,j ) );
7243 const IntrinsicType b2( B.load(k,j+
IT::size) );
7244 xmm1 = xmm1 + a1 * b1;
7245 xmm2 = xmm2 + a1 * b2;
7246 xmm3 = xmm3 + a2 * b1;
7247 xmm4 = xmm4 + a2 * b2;
7250 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7252 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
7258 const size_t kbegin( ( IsUpper<MT4>::value )
7259 ?( ( IsLower<MT5>::value )
7260 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7261 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7262 :( IsLower<MT5>::value ? j : 0UL ) );
7263 const size_t kend( ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, K ) ):( K ) );
7265 IntrinsicType xmm1, xmm2;
7267 for(
size_t k=kbegin; k<kend; ++k ) {
7268 const IntrinsicType a1(
set( A(i,k) ) );
7269 xmm1 = xmm1 + a1 * B.load(k,j );
7270 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
7273 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
7282 for( ; (i+2UL) <= M; i+=2UL )
7284 const size_t kbegin( ( IsUpper<MT4>::value )
7285 ?( ( IsLower<MT5>::value )
7286 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7287 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7288 :( IsLower<MT5>::value ? j : 0UL ) );
7289 const size_t kend( ( IsLower<MT4>::value )
7290 ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
7293 IntrinsicType xmm1, xmm2;
7295 for(
size_t k=kbegin; k<kend; ++k ) {
7296 const IntrinsicType b1( B.load(k,j) );
7297 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
7298 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
7301 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
7302 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
7307 const size_t kbegin( ( IsUpper<MT4>::value )
7308 ?( ( IsLower<MT5>::value )
7309 ?(
max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7310 :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7311 :( IsLower<MT5>::value ? j : 0UL ) );
7315 for(
size_t k=kbegin; k<K; ++k ) {
7316 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
7319 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
7339 template<
typename MT3
7343 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7344 selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
7351 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
7355 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
7359 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
7384 template<
typename MT3
7388 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7389 selectLargeSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7391 selectDefaultSubAssignKernel( C, A, B, scalar );
7410 template<
typename MT3
7414 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7415 selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
7417 typedef IntrinsicTrait<ElementType> IT;
7419 const size_t M( A.rows() );
7420 const size_t N( B.columns() );
7421 const size_t K( A.columns() );
7423 const size_t iblock( 64UL );
7424 const size_t jblock( 128UL );
7425 const size_t kblock( 128UL );
7427 const IntrinsicType factor(
set( scalar ) );
7429 for(
size_t jj=0UL; jj<N; jj+=jblock )
7431 const size_t jend(
min( jj+jblock, N ) );
7433 for(
size_t ii=0UL; ii<M; ii+=iblock )
7435 const size_t iend(
min( ii+iblock, M ) );
7437 for(
size_t kk=0UL; kk<K; kk+=kblock )
7439 const size_t ktmp(
min( kk+kblock, K ) );
7451 for( ; (i+2UL) <= iend; i+=2UL )
7453 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
7454 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
7455 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
7456 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
7458 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7460 for(
size_t k=kbegin; k<kend; ++k ) {
7461 const IntrinsicType a1(
set( A(i ,k) ) );
7462 const IntrinsicType a2(
set( A(i+1UL,k) ) );
7463 const IntrinsicType b1( B.load(k,j ) );
7464 const IntrinsicType b2( B.load(k,j1) );
7465 const IntrinsicType b3( B.load(k,j2) );
7466 const IntrinsicType b4( B.load(k,j3) );
7467 xmm1 = xmm1 + a1 * b1;
7468 xmm2 = xmm2 + a1 * b2;
7469 xmm3 = xmm3 + a1 * b3;
7470 xmm4 = xmm4 + a1 * b4;
7471 xmm5 = xmm5 + a2 * b1;
7472 xmm6 = xmm6 + a2 * b2;
7473 xmm7 = xmm7 + a2 * b3;
7474 xmm8 = xmm8 + a2 * b4;
7477 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7478 (~C).
store( i , j1, (~C).load(i ,j1) - xmm2 * factor );
7479 (~C).
store( i , j2, (~C).load(i ,j2) - xmm3 * factor );
7480 (~C).
store( i , j3, (~C).load(i ,j3) - xmm4 * factor );
7481 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
7482 (~C).
store( i+1UL, j1, (~C).load(i+1UL,j1) - xmm6 * factor );
7483 (~C).
store( i+1UL, j2, (~C).load(i+1UL,j2) - xmm7 * factor );
7484 (~C).
store( i+1UL, j3, (~C).load(i+1UL,j3) - xmm8 * factor );
7489 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
7490 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
7491 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
7492 ( IsUpper<MT5>::value )?(
min( j+
IT::size*4UL, ktmp ) ):( ktmp ) ) );
7494 IntrinsicType xmm1, xmm2, xmm3, xmm4;
7496 for(
size_t k=kbegin; k<kend; ++k ) {
7497 const IntrinsicType a1(
set( A(i,k) ) );
7498 xmm1 = xmm1 + a1 * B.load(k,j );
7499 xmm2 = xmm2 + a1 * B.load(k,j1);
7500 xmm3 = xmm3 + a1 * B.load(k,j2);
7501 xmm4 = xmm4 + a1 * B.load(k,j3);
7504 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
7505 (~C).
store( i, j1, (~C).load(i,j1) - xmm2 * factor );
7506 (~C).
store( i, j2, (~C).load(i,j2) - xmm3 * factor );
7507 (~C).
store( i, j3, (~C).load(i,j3) - xmm4 * factor );
7517 for( ; (i+4UL) <= iend; i+=4UL )
7519 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
7520 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
7521 const size_t kend (
min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
7522 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
7524 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7526 for(
size_t k=kbegin; k<kend; ++k ) {
7527 const IntrinsicType a1(
set( A(i ,k) ) );
7528 const IntrinsicType a2(
set( A(i+1UL,k) ) );
7529 const IntrinsicType a3(
set( A(i+2UL,k) ) );
7530 const IntrinsicType a4(
set( A(i+3UL,k) ) );
7531 const IntrinsicType b1( B.load(k,j ) );
7532 const IntrinsicType b2( B.load(k,j1) );
7533 xmm1 = xmm1 + a1 * b1;
7534 xmm2 = xmm2 + a1 * b2;
7535 xmm3 = xmm3 + a2 * b1;
7536 xmm4 = xmm4 + a2 * b2;
7537 xmm5 = xmm5 + a3 * b1;
7538 xmm6 = xmm6 + a3 * b2;
7539 xmm7 = xmm7 + a4 * b1;
7540 xmm8 = xmm8 + a4 * b2;
7543 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7544 (~C).
store( i , j1, (~C).load(i ,j1) - xmm2 * factor );
7545 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
7546 (~C).
store( i+1UL, j1, (~C).load(i+1UL,j1) - xmm4 * factor );
7547 (~C).
store( i+2UL, j , (~C).load(i+2UL,j ) - xmm5 * factor );
7548 (~C).
store( i+2UL, j1, (~C).load(i+2UL,j1) - xmm6 * factor );
7549 (~C).
store( i+3UL, j , (~C).load(i+3UL,j ) - xmm7 * factor );
7550 (~C).
store( i+3UL, j1, (~C).load(i+3UL,j1) - xmm8 * factor );
7553 for( ; (i+2UL) <= iend; i+=2UL )
7555 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
7556 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
7557 const size_t kend (
min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
7558 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
7560 IntrinsicType xmm1, xmm2, xmm3, xmm4;
7562 for(
size_t k=kbegin; k<kend; ++k ) {
7563 const IntrinsicType a1(
set( A(i ,k) ) );
7564 const IntrinsicType a2(
set( A(i+1UL,k) ) );
7565 const IntrinsicType b1( B.load(k,j ) );
7566 const IntrinsicType b2( B.load(k,j1) );
7567 xmm1 = xmm1 + a1 * b1;
7568 xmm2 = xmm2 + a1 * b2;
7569 xmm3 = xmm3 + a2 * b1;
7570 xmm4 = xmm4 + a2 * b2;
7573 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7574 (~C).
store( i , j1, (~C).load(i ,j1) - xmm2 * factor );
7575 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
7576 (~C).
store( i+1UL, j1, (~C).load(i+1UL,j1) - xmm4 * factor );
7581 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
7582 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
7583 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
7584 ( IsUpper<MT5>::value )?(
min( j+
IT::size*2UL, ktmp ) ):( ktmp ) ) );
7586 IntrinsicType xmm1, xmm2;
7588 for(
size_t k=kbegin; k<kend; ++k ) {
7589 const IntrinsicType a1(
set( A(i,k) ) );
7590 xmm1 = xmm1 + a1 * B.load(k,j );
7591 xmm2 = xmm2 + a1 * B.load(k,j1);
7594 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
7595 (~C).
store( i, j1, (~C).load(i,j1) - xmm2 * factor );
7601 for(
size_t i=ii; i<iend; ++i )
7603 const size_t kbegin(
max( ( IsUpper<MT4>::value )?(
max( i, kk ) ):( kk ),
7604 ( IsLower<MT5>::value )?(
max( j, kk ) ):( kk ) ) );
7605 const size_t kend (
min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
7606 ( IsUpper<MT5>::value )?(
min( j+
IT::size, ktmp ) ):( ktmp ) ) );
7610 for(
size_t k=kbegin; k<kend; ++k ) {
7611 const IntrinsicType a1(
set( A(i,k) ) );
7612 xmm1 = xmm1 + a1 * B.load(k,j);
7615 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
7638 template<
typename MT3
7642 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7643 selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
7645 selectSmallSubAssignKernel( ~C, A, B, scalar );
7663 template<
typename MT3
7667 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7668 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7670 selectLargeSubAssignKernel( C, A, B, scalar );
7689 template<
typename MT3
7693 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7694 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7696 if( IsTriangular<MT4>::value ) {
7698 strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7701 else if( IsTriangular<MT5>::value ) {
7703 strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7707 sgemm( C, A, B, -scalar, 1.0F );
7728 template<
typename MT3
7732 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7733 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7735 if( IsTriangular<MT4>::value ) {
7737 dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7740 else if( IsTriangular<MT5>::value ) {
7742 dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7746 dgemm( C, A, B, -scalar, 1.0 );
7767 template<
typename MT3
7771 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7772 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7774 if( IsTriangular<MT4>::value ) {
7776 ctrmm( tmp, A, CblasLeft,
7777 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7778 complex<float>( scalar, 0.0F ) );
7781 else if( IsTriangular<MT5>::value ) {
7783 ctrmm( tmp, B, CblasRight,
7784 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7785 complex<float>( scalar, 0.0F ) );
7789 cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
7810 template<
typename MT3
7814 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7815 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
7817 if( IsTriangular<MT4>::value ) {
7819 ztrmm( tmp, A, CblasLeft,
7820 ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7821 complex<float>( scalar, 0.0 ) );
7824 else if( IsTriangular<MT5>::value ) {
7826 ztrmm( tmp, B, CblasRight,
7827 ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7828 complex<float>( scalar, 0.0 ) );
7832 zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
7852 template<
typename MT >
7853 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
7854 subAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
7863 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7864 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7866 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
7868 else if( IsSymmetric<MT1>::value )
7902 template<
typename MT
7904 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7905 smpAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
7912 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7913 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7915 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
7918 else if( left.columns() == 0UL ) {
7952 template<
typename MT
7954 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7955 smpAssign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
7959 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
7971 const TmpType tmp( rhs );
7990 template<
typename MT >
7991 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
7992 smpAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
8001 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8002 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8004 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8006 else if( IsSymmetric<MT1>::value )
8028 template<
typename MT
8030 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8031 smpAddAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
8038 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8039 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8041 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
8073 template<
typename MT >
8074 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8075 smpAddAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
8084 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8085 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8087 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8089 else if( IsSymmetric<MT1>::value )
8115 template<
typename MT
8117 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8118 smpSubAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
8125 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8126 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8128 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
8160 template<
typename MT >
8161 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8162 smpSubAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
8171 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8172 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8174 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8176 else if( IsSymmetric<MT1>::value )
8244 template<
typename T1
8246 inline const DMatDMatMultExpr<T1,T2>
8252 throw std::invalid_argument(
"Matrix sizes do not match" );
8269 template<
typename MT1,
typename MT2 >
8287 template<
typename MT1,
typename MT2 >
8289 :
public Columns<MT2>
8305 template<
typename MT1,
typename MT2 >
8307 :
public IsTrue< And< IsLower<MT1>, IsLower<MT2> >::value >
8323 template<
typename MT1,
typename MT2 >
8325 :
public IsTrue< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
8341 template<
typename MT1,
typename MT2 >
8343 :
public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8344 , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
8360 template<
typename MT1,
typename MT2 >
8362 :
public IsTrue< And< IsUpper<MT1>, IsUpper<MT2> >::value >
8378 template<
typename MT1,
typename MT2 >
8380 :
public IsTrue< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
8396 template<
typename MT1,
typename MT2 >
8398 :
public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8399 , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
8415 template<
typename MT1,
typename MT2,
typename VT >
8420 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8421 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
8422 IsDenseVector<VT>::value && IsColumnVector<VT>::value
8423 ,
typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
8424 , INVALID_TYPE >::Type Type;
8433 template<
typename MT1,
typename MT2,
typename VT >
8438 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8439 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
8440 IsSparseVector<VT>::value && IsColumnVector<VT>::value
8441 ,
typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
8442 , INVALID_TYPE >::Type Type;
8451 template<
typename VT,
typename MT1,
typename MT2 >
8456 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
8457 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8458 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
8459 ,
typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8460 , INVALID_TYPE >::Type Type;
8469 template<
typename VT,
typename MT1,
typename MT2 >
8474 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
8475 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8476 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
8477 ,
typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8478 , INVALID_TYPE >::Type Type;
8487 template<
typename MT1,
typename MT2,
bool AF >
8492 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
8493 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
8502 template<
typename MT1,
typename MT2 >
8507 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
8516 template<
typename MT1,
typename MT2 >
8521 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:145
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
Header file for mathematical functions.
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
Header file for basic type definitions.
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:142
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:320
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:416
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:307
Header file for the IsSame and IsStrictlySame type traits.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:311
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
Header file for the And class template.
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:259
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:317
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:134
Header file for the IsUniLower type trait.
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2503
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:143
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:436
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
Header file for the Or class template.
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > > >::Type store(T *address, const sse_int16_t &value)
Aligned store of a vector of 2-byte integral values.
Definition: Store.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:312
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:480
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
Header file for the Not class template.
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifies w...
Definition: Thresholds.h:834
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
Header file for BLAS level 3 functions.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Header file for the IsStrictlyTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:348
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:490
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:309
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:458
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type load(const T *address)
Loads a vector of 2-byte integral values.
Definition: Load.h:79
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:446
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:363
System settings for the BLAS mode.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:499
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:308
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:150
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:314
Constraint on the data type.
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:426
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:310
Header file for the HasMutableDataAccess type trait.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:326
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:323
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:313
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:937
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:140
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
Header file for the IsTrue value trait.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:144
Header file for the IsUpper type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:141
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:500
Header file for the IsResizable type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:470
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849