6 #ifndef LAPACK_FLOPS_HH
7 #define LAPACK_FLOPS_HH
10 #include "blas/flops.hh"
27 inline double fmuls_getrf(
double m,
double n)
30 ? (0.5*m*n*n - 1./6*n*n*n + 0.5*m*n - 0.5*n*n + 2/3.*n)
31 : (0.5*n*m*m - 1./6*m*m*m + 0.5*n*m - 0.5*m*m + 2/3.*m);
34 inline double fadds_getrf(
double m,
double n)
37 ? (0.5*m*n*n - 1./6*n*n*n - 0.5*m*n + 1./6*n)
38 : (0.5*n*m*m - 1./6*m*m*m - 0.5*n*m + 1./6*m);
42 inline double fmuls_getri(
double n)
43 {
return 2/3.*n*n*n + 0.5*n*n + 5./6*n; }
45 inline double fadds_getri(
double n)
46 {
return 2/3.*n*n*n - 1.5*n*n + 5./6*n; }
49 inline double fmuls_getrs(
double n,
double nrhs)
52 inline double fadds_getrs(
double n,
double nrhs)
53 {
return nrhs*n*(n - 1); }
56 inline double fmuls_potrf(
double n)
57 {
return 1./6*n*n*n + 0.5*n*n + 1./3.*n; }
59 inline double fadds_potrf(
double n)
60 {
return 1./6*n*n*n - 1./6*n; }
63 inline double fmuls_potri(
double n)
64 {
return 1./3.*n*n*n + n*n + 2/3.*n; }
66 inline double fadds_potri(
double n)
67 {
return 1./3.*n*n*n - 0.5*n*n + 1./6*n; }
70 inline double fmuls_potrs(
double n,
double nrhs)
71 {
return nrhs*n*(n + 1); }
73 inline double fadds_potrs(
double n,
double nrhs)
74 {
return nrhs*n*(n - 1); }
77 inline double fmuls_pbtrf(
double n,
double k)
78 {
return n*(1./2.*k*k + 3./2.*k + 1) - 1./3.*k*k*k - k*k - 2./3.*k; }
80 inline double fadds_pbtrf(
double n,
double k)
81 {
return n*(1./2.*k*k + 1./2.*k) - 1./3.*k*k*k - 1./2.*k*k - 1./6.*k; }
84 inline double fmuls_pbtrs(
double n,
double nrhs,
double k)
85 {
return nrhs*(2*n*k + 2*n - k*k - k); }
87 inline double fadds_pbtrs(
double n,
double nrhs,
double k)
88 {
return nrhs*(2*n*k - k*k - k); }
91 inline double fmuls_sytrf(
double n)
92 {
return 1/6.*n*n*n + 0.5*n*n + 10/3.*n; }
94 inline double fadds_sytrf(
double n)
95 {
return 1/6.*n*n*n - 1/6.*n; }
98 inline double fmuls_sytri(
double n)
99 {
return 1/3.*n*n*n + n*n + 2/3.*n; }
101 inline double fadds_sytri(
double n)
102 {
return 1/3.*n*n*n - 1/3.*n; }
105 inline double fmuls_sytrs(
double n,
double nrhs)
106 {
return nrhs*n*(n + 1); }
108 inline double fadds_sytrs(
double n,
double nrhs)
109 {
return nrhs*n*(n - 1); }
112 inline double fmuls_geqrf(
double m,
double n)
115 ? (m*n*n - 1./3.*n*n*n + m*n + 0.5*n*n + 23./6*n)
116 : (n*m*m - 1./3.*m*m*m + 2*n*m - 0.5*m*m + 23./6*m);
119 inline double fadds_geqrf(
double m,
double n)
122 ? (m*n*n - 1./3.*n*n*n + 0.5*n*n + 5./6*n)
123 : (n*m*m - 1./3.*m*m*m + n*m - 0.5*m*m + 5./6*m);
128 inline double fmuls_geqrt(
double m,
double n)
131 inline double fadds_geqrt(
double m,
double n)
135 inline double fmuls_geqlf(
double m,
double n)
136 {
return fmuls_geqrf(m, n); }
138 inline double fadds_geqlf(
double m,
double n)
139 {
return fadds_geqrf(m, n); }
142 inline double fmuls_gerqf(
double m,
double n)
145 ? (m*n*n - 1./3.*n*n*n + m*n + 0.5*n*n + 29./6*n)
146 : (n*m*m - 1./3.*m*m*m + 2*n*m - 0.5*m*m + 29./6*m);
149 inline double fadds_gerqf(
double m,
double n)
152 ? (m*n*n - 1./3.*n*n*n + m*n - 0.5*n*n + 5./6*n)
153 : (n*m*m - 1./3.*m*m*m + 0.5*m*m + 5./6*m);
157 inline double fmuls_gelqf(
double m,
double n)
158 {
return fmuls_gerqf(m, n); }
160 inline double fadds_gelqf(
double m,
double n)
161 {
return fadds_gerqf(m, n); }
164 inline double fmuls_ungqr(
double m,
double n,
double k)
165 {
return 2*m*n*k - (m + n)*k*k + 2/3.*k*k*k + 2*n*k - k*k - 5./3.*k; }
167 inline double fadds_ungqr(
double m,
double n,
double k)
168 {
return 2*m*n*k - (m + n)*k*k + 2/3.*k*k*k + n*k - m*k + 1./3.*k; }
171 inline double fmuls_ungql(
double m,
double n,
double k)
172 {
return fmuls_ungqr(m, n, k); }
174 inline double fadds_ungql(
double m,
double n,
double k)
175 {
return fadds_ungqr(m, n, k); }
178 inline double fmuls_ungrq(
double m,
double n,
double k)
179 {
return 2*m*n*k - (m + n)*k*k + 2/3.*k*k*k + m*k + n*k - k*k - 2/3.*k; }
181 inline double fadds_ungrq(
double m,
double n,
double k)
182 {
return 2*m*n*k - (m + n)*k*k + 2/3.*k*k*k + m*k - n*k + 1./3.*k; }
185 inline double fmuls_unglq(
double m,
double n,
double k)
186 {
return fmuls_ungrq(m, n, k); }
188 inline double fadds_unglq(
double m,
double n,
double k)
189 {
return fadds_ungrq(m, n, k); }
192 inline double fmuls_unmqr(lapack::Side side,
double m,
double n,
double k)
194 return (side == lapack::Side::Left)
195 ? (2*n*m*k - n*k*k + 2*n*k)
196 : (2*n*m*k - m*k*k + m*k + n*k - 0.5*k*k + 0.5*k);
199 inline double fadds_unmqr(lapack::Side side,
double m,
double n,
double k)
201 return (side == lapack::Side::Left)
202 ? (2*n*m*k - n*k*k + n*k)
203 : (2*n*m*k - m*k*k + m*k);
207 inline double fmuls_unmql(lapack::Side side,
double m,
double n,
double k)
208 {
return fmuls_unmqr(side, m, n, k); }
210 inline double fadds_unmql(lapack::Side side,
double m,
double n,
double k)
211 {
return fadds_unmqr(side, m, n, k); }
214 inline double fmuls_unmrq(lapack::Side side,
double m,
double n,
double k)
215 {
return fmuls_unmqr(side, m, n, k); }
217 inline double fadds_unmrq(lapack::Side side,
double m,
double n,
double k)
218 {
return fadds_unmqr(side, m, n, k); }
221 inline double fmuls_unmlq(lapack::Side side,
double m,
double n,
double k)
222 {
return fmuls_unmqr(side, m, n, k); }
224 inline double fadds_unmlq(lapack::Side side,
double m,
double n,
double k)
225 {
return fadds_unmqr(side, m, n, k); }
228 inline double fmuls_trtri(
double n)
229 {
return 1./6*n*n*n + 0.5*n*n + 1./3.*n; }
231 inline double fadds_trtri(
double n)
232 {
return 1./6*n*n*n - 0.5*n*n + 1./3.*n; }
235 inline double fmuls_gehrd(
double n)
236 {
return 5./3.*n*n*n + 0.5*n*n - 7./6*n; }
238 inline double fadds_gehrd(
double n)
239 {
return 5./3.*n*n*n - n*n - 2/3.*n; }
242 inline double fmuls_sytrd(
double n)
243 {
return 2/3.*n*n*n + 2.5*n*n - 1./6*n; }
245 inline double fadds_sytrd(
double n)
246 {
return 2/3.*n*n*n + n*n - 8./3.*n; }
248 inline double fmuls_hetrd(
double n)
249 {
return fmuls_sytrd(n); }
251 inline double fadds_hetrd(
double n)
252 {
return fadds_sytrd(n); }
255 inline double fmuls_gebrd(
double m,
double n)
258 ? (2*m*n*n - 2/3.*n*n*n + 2*n*n + 20./3.*n)
259 : (2*n*m*m - 2/3.*m*m*m + 2*m*m + 20./3.*m);
262 inline double fadds_gebrd(
double m,
double n)
265 ? (2*m*n*n - 2/3.*n*n*n + n*n - m*n + 5./3.*n)
266 : (2*n*m*m - 2/3.*m*m*m + m*m - n*m + 5./3.*m);
270 inline double fmuls_larfg(
double n)
273 inline double fadds_larfg(
double n)
277 inline double fmuls_geadd(
double m,
double n)
280 inline double fadds_geadd(
double m,
double n)
284 inline double fmuls_lauum(
double n)
285 {
return fmuls_potri(n) - fmuls_trtri(n); }
287 inline double fadds_lauum(
double n)
288 {
return fadds_potri(n) - fadds_trtri(n); }
291 inline double fmuls_lange(lapack::Norm norm,
double m,
double n)
292 {
return norm == lapack::Norm::Fro ? m*n : 0; }
294 inline double fadds_lange(lapack::Norm norm,
double m,
double n)
297 case lapack::Norm::One:
return (m-1)*n;
298 case lapack::Norm::Inf:
return (n-1)*m;
299 case lapack::Norm::Fro:
return m*n-1;
305 inline double fmuls_lanhe(lapack::Norm norm,
double n)
306 {
return norm == lapack::Norm::Fro ? n*(n+1)/2 : 0; }
308 inline double fadds_lanhe(lapack::Norm norm,
double n)
311 case lapack::Norm::One:
return (n-1)*n;
312 case lapack::Norm::Inf:
return (n-1)*n;
313 case lapack::Norm::Fro:
return n*(n+1)/2-1;
323 template<
typename T >
325 public blas::Gbyte<T>
334 template<
typename T >
336 public blas::Gflop<T>
339 using blas::Gflop<T>::mul_ops;
340 using blas::Gflop<T>::add_ops;
343 static double gesv(
double n,
double nrhs)
344 {
return getrf(n, n) + getrs(n, nrhs); }
346 static double getrf(
double m,
double n)
347 {
return 1e-9 * (mul_ops*fmuls_getrf(m, n) + add_ops*fadds_getrf(m, n)); }
349 static double getri(
double n)
350 {
return 1e-9 * (mul_ops*fmuls_getri(n) + add_ops*fadds_getri(n)); }
352 static double getrs(
double n,
double nrhs)
353 {
return 1e-9 * (mul_ops*fmuls_getrs(n, nrhs) + add_ops*fadds_getrs(n, nrhs)); }
356 static double posv(
double n,
double nrhs)
357 {
return potrf(n) + potrs(n, nrhs); }
359 static double potrf(
double n)
360 {
return 1e-9 * (mul_ops*fmuls_potrf(n) + add_ops*fadds_potrf(n)); }
362 static double potri(
double n)
363 {
return 1e-9 * (mul_ops*fmuls_potri(n) + add_ops*fadds_potri(n)); }
365 static double potrs(
double n,
double nrhs)
366 {
return 1e-9 * (mul_ops*fmuls_potrs(n, nrhs) + add_ops*fadds_potrs(n, nrhs)); }
369 static double pbsv(
double n,
double nrhs,
double k)
370 {
return pbtrf(n, k) + pbtrs(n, nrhs, k); }
372 static double pbtrf(
double n,
double k)
373 {
return 1e-9 * (mul_ops*fmuls_pbtrf(n, k) + add_ops*fadds_pbtrf(n, k)); }
375 static double pbtrs(
double n,
double nrhs,
double k)
376 {
return 1e-9 * (mul_ops*fmuls_pbtrs(n, nrhs, k) + add_ops*fadds_pbtrs(n, nrhs, k)); }
379 static double sysv(
double n,
double nrhs)
380 {
return sytrf(n) + sytrs(n, nrhs); }
382 static double sytrf(
double n)
383 {
return 1e-9 * (mul_ops*fmuls_sytrf(n) + add_ops*fadds_sytrf(n)); }
385 static double sytri(
double n)
386 {
return 1e-9 * (mul_ops*fmuls_sytri(n) + add_ops*fadds_sytri(n)); }
388 static double sytrs(
double n,
double nrhs)
389 {
return 1e-9 * (mul_ops*fmuls_sytrs(n, nrhs) + add_ops*fadds_sytrs(n, nrhs)); }
391 static double hesv(
double n,
double nrhs)
392 {
return sysv(n, nrhs); }
394 static double hetrf(
double n)
397 static double hetri(
double n)
400 static double hetrs(
double n,
double nrhs)
401 {
return sytrs(n, nrhs); }
404 static double geqrf(
double m,
double n)
405 {
return 1e-9 * (mul_ops*fmuls_geqrf(m, n) + add_ops*fadds_geqrf(m, n)); }
407 static double geqrt(
double m,
double n)
408 {
return 1e-9 * (mul_ops*fmuls_geqrt(m, n) + add_ops*fadds_geqrt(m, n)); }
410 static double geqlf(
double m,
double n)
411 {
return 1e-9 * (mul_ops*fmuls_geqlf(m, n) + add_ops*fadds_geqlf(m, n)); }
413 static double gerqf(
double m,
double n)
414 {
return 1e-9 * (mul_ops*fmuls_gerqf(m, n) + add_ops*fadds_gerqf(m, n)); }
416 static double gelqf(
double m,
double n)
417 {
return 1e-9 * (mul_ops*fmuls_gelqf(m, n) + add_ops*fadds_gelqf(m, n)); }
420 static double ungqr(
double m,
double n,
double k)
421 {
return 1e-9 * (mul_ops*fmuls_ungqr(m, n, k) + add_ops*fadds_ungqr(m, n, k)); }
423 static double orgqr(
double m,
double n,
double k)
424 {
return ungqr(m, n, k); }
426 static double ungql(
double m,
double n,
double k)
427 {
return 1e-9 * (mul_ops*fmuls_ungql(m, n, k) + add_ops*fadds_ungql(m, n, k)); }
429 static double orgql(
double m,
double n,
double k)
430 {
return ungql(m, n, k); }
432 static double ungrq(
double m,
double n,
double k)
433 {
return 1e-9 * (mul_ops*fmuls_ungrq(m, n, k) + add_ops*fadds_ungrq(m, n, k)); }
435 static double orgrq(
double m,
double n,
double k)
436 {
return ungrq(m, n, k); }
438 static double unglq(
double m,
double n,
double k)
439 {
return 1e-9 * (mul_ops*fmuls_unglq(m, n, k) + add_ops*fadds_unglq(m, n, k)); }
441 static double orglq(
double m,
double n,
double k)
442 {
return unglq(m, n, k); }
445 static double unmqr(lapack::Side side,
double m,
double n,
double k)
446 {
return 1e-9 * (mul_ops*fmuls_unmqr(side, m, n, k) + add_ops*fadds_unmqr(side, m, n, k)); }
448 static double ormqr(lapack::Side side,
double m,
double n,
double k)
449 {
return unmqr(side, m, n, k); }
451 static double unmql(lapack::Side side,
double m,
double n,
double k)
452 {
return 1e-9 * (mul_ops*fmuls_unmql(side, m, n, k) + add_ops*fadds_unmql(side, m, n, k)); }
454 static double ormql(lapack::Side side,
double m,
double n,
double k)
455 {
return unmql(side, m, n, k); }
457 static double unmrq(lapack::Side side,
double m,
double n,
double k)
458 {
return 1e-9 * (mul_ops*fmuls_unmrq(side, m, n, k) + add_ops*fadds_unmrq(side, m, n, k)); }
460 static double ormrq(lapack::Side side,
double m,
double n,
double k)
461 {
return unmrq(side, m, n, k); }
463 static double unmlq(lapack::Side side,
double m,
double n,
double k)
464 {
return 1e-9 * (mul_ops*fmuls_unmlq(side, m, n, k) + add_ops*fadds_unmlq(side, m, n, k)); }
466 static double ormlq(lapack::Side side,
double m,
double n,
double k)
467 {
return unmlq(side, m, n, k); }
470 static double gels(
double m,
double n,
double nrhs)
472 blas::Side left = blas::Side::Left;
474 ? geqrf(m, n) + unmqr(left, m, nrhs, n) + blas::Gflop<T>::trsm(left, n, nrhs)
475 : gelqf(m, n) + unmlq(left, n, nrhs, m) + blas::Gflop<T>::trsm(left, m, nrhs));
479 static double trtri(
double n)
480 {
return 1e-9 * (mul_ops*fmuls_trtri(n) + add_ops*fadds_trtri(n)); }
483 static double gehrd(
double n)
484 {
return 1e-9 * (mul_ops*fmuls_gehrd(n) + add_ops*fadds_gehrd(n)); }
487 static double hetrd(
double n)
488 {
return 1e-9 * (mul_ops*fmuls_sytrd(n) + add_ops*fadds_sytrd(n)); }
490 static double sytrd(
double n)
494 static double gebrd(
double m,
double n)
495 {
return 1e-9 * (mul_ops*fmuls_gebrd(m, n) + add_ops*fadds_gebrd(m, n)); }
498 static double larfg(
double n)
499 {
return 1e-9 * (mul_ops*fmuls_larfg(n) + add_ops*fadds_larfg(n)); }
502 static double geadd(
double m,
double n)
503 {
return 1e-9 * (mul_ops*fmuls_geadd(m, n) + add_ops*fadds_geadd(m, n)); }
506 static double lauum(
double n)
507 {
return 1e-9 * (mul_ops*fmuls_lauum(n) + add_ops*fadds_lauum(n)); }
510 static double lange(lapack::Norm norm,
double m,
double n)
511 {
return 1e-9 * (mul_ops*fmuls_lange(norm, m, n) + add_ops*fadds_lange(norm, m, n)); }
513 static double lanhe(lapack::Norm norm,
double n)
514 {
return 1e-9 * (mul_ops*fmuls_lanhe(norm, n) + add_ops*fadds_lanhe(norm, n)); }
516 static double lansy(lapack::Norm norm,
double n)
517 {
return lanhe(norm, n); }
522 #endif // LAPACK_FLOPS_HH