hmbdc
simplify-high-performance-messaging-programming
StatHistogram.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 
4 #include "hmbdc/Exception.hpp"
5 #include <map>
6 #include <vector>
7 #include <utility>
8 #include <limits>
9 #include <algorithm>
10 #include <stdexcept>
11 #include <ext/mt_allocator.h>
12 
13 namespace hmbdc { namespace numeric {
14 namespace stathistogram_detail {
15 using namespace std;
16 
18  template <typename Hist>
19  static
20  void display(ostream& os, Hist const& hist, size_t sampleSize
21  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) {
22  auto h = hist.report(percentages);
23  for (auto i = 0u; i < percentages.size(); ++i) {
24  os << percentages[i] << "%=" << h[i] << ',';
25  }
26  os << "sample=" << sampleSize;
27  }
28 
29 };
30 
31 /**
32  * @brief collect sample values and keep histogram for top percentages
33  * @details top values are the smaller values
34  *
35  * @tparam T value type that supports less than operator
36  * @tparam DETAILED if false, the samples are kept in coarser grain and the class's
37  * speed performance is better
38  */
39 template <typename T, bool DETAILED = true>
41 : private StatHistogramBase {
43  : threshold_(numeric_limits<T>::max())
44  , worst_(numeric_limits<T>::min())
45  , sampleSize_(0ul)
46  {}
47 
48  explicit StatHistogram(T threshold)
49  : threshold_(threshold)
50  , worst_(numeric_limits<T>::min())
51  , sampleSize_(0ul){}
52 
53  bool add(T sample) {
54  ++sampleSize_;
55  if (sample < threshold_)
56  buckets_[sample]++;
57  else
58  buckets_[threshold_]++;
59 
60  if (sample > worst_) {
61  worst_ = sample;
62  return true;
63  }
64  return false;
65  }
66 
67  size_t sampleSize() const {
68  return sampleSize_;
69  }
70 
71  StatHistogram<T>& operator += (StatHistogram<T> const& other) {
72  if (threshold_ == other.threshold_) {
73  for (auto const& v : other.buckets_) {
74  buckets_[v.first] += v.second;
75  }
76  worst_ = max(worst_, other.worst_);
77  } else {
78  HMBDC_THROW(runtime_error, "histogram collection parameters mismatch - failed");
79  }
80  sampleSize_ += other.sampleSize_;
81  return *this;
82  }
83 
84  vector<T> report(vector<float> percentages
85  = {0, 1, 10, 50, 90, 99, 100}) const {
86  vector<T> p(percentages.size());
87  if (!buckets_.empty() && !p.empty()) {
88  *p.begin() = buckets_.begin()->first;
89  *p.rbegin() = worst_;
90  }
91  size_t count = 0;
92  size_t perIndex = 1;
93  for(auto& i : buckets_) {
94  count += i.second;
95  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
96  if (count * 100ul >= percentages[j] * sampleSize_) {
97  p[j] = i.first;
98  perIndex++;
99  } else {
100  break;
101  }
102  }
103  }
104 
105  return p;
106  }
107 
108  void display(ostream& os
109  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
110  StatHistogramBase::display(os, *this, sampleSize_, percentages);
111  }
112 
113  friend
114  ostream& operator << (ostream& os, StatHistogram const& hist) {
115  hist.display(os);
116  return os;
117  }
118 private:
119 
120  using Buckets = map<T, size_t, less<T>
121  , __gnu_cxx::__mt_alloc<pair<const T, size_t>>
122  >;
123  Buckets buckets_;
124  T threshold_;
125  T worst_;
126  size_t sampleSize_;
127 };
128 
129 template <typename T>
130 struct StatHistogram<T, false>
131 : private StatHistogramBase {
133  T thresholdMin
134  , T thresholdMax
135  , size_t bucketCount = 1000u)
136  : thresholdMin_(thresholdMin)
137  , thresholdMax_(thresholdMax)
138  , best_(numeric_limits<T>::max())
139  , worst_(numeric_limits<T>::min())
140  , sampleSize_(0ul)
141  , unit_((thresholdMax - thresholdMin) / bucketCount)
142  , buckets_(bucketCount + 1) {
143  if (thresholdMax <= thresholdMin) {
144  HMBDC_THROW(runtime_error, "thresholdMax <= thresholdMin");
145  }
146  }
147 
148  int add(T sample) {
149  ++sampleSize_;
150 
151  if (sample < thresholdMin_)
152  buckets_[0]++;
153  else if (sample < thresholdMax_)
154  buckets_[(sample - thresholdMin_) / unit_]++;
155  else
156  buckets_[buckets_.size() - 1]++;
157 
158  auto res = 0;
159  if (sample < best_) {
160  best_ = sample;
161  res = -1;
162  }
163  if (sample > worst_) {
164  worst_ = sample;
165  res = 1;
166  }
167 
168  return res;
169  }
170 
171  size_t sampleSize() const {
172  return sampleSize_;
173  }
174 
175  StatHistogram<T, false>& operator += (StatHistogram<T, false> const& other) {
176  if (thresholdMax_ == other.thresholdMax_ &&
177  thresholdMin_ == other.thresholdMin_ &&
178  buckets_.size() == other.buckets_.size()) {
179  for (auto i = 0u; i < buckets_.size(); ++i) {
180  buckets_[i] += other.buckets_[i];
181  }
182  worst_ = max(worst_, other.worst_);
183  best_ = min(best_, other.best_);
184  sampleSize_ += other.sampleSize_;
185  } else {
186  HMBDC_THROW(runtime_error, "thresholds or bucketCount mismatch - failed");
187  }
188  return *this;
189  }
190 
191  vector<T> report(vector<float> percentages
192  = {0, 1, 10, 50, 90, 99, 100}) const {
193 
194  vector<T> p(percentages.size());
195  if (sampleSize_ && !p.empty()) {
196  *p.begin() = best_;
197  *p.rbegin() = worst_;
198  size_t count = 0;
199  auto val = thresholdMin_;
200  size_t perIndex = 1;
201  for(auto& i : buckets_) {
202  count += i;
203  val += unit_;
204  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
205  if (count * 100ul >= percentages[j] * sampleSize_) {
206  p[j] = min(val, worst_);
207  perIndex++;
208  } else {
209  break;
210  }
211  }
212  }
213  }
214 
215  return p;
216  }
217 
218  void display(ostream& os
219  , vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
220  StatHistogramBase::display(os, *this, sampleSize_, percentages);
221  }
222 
223  friend
224  ostream& operator << (ostream& os, StatHistogram const& hist) {
225  hist.display(os);
226  return os;
227  }
228 
229 private:
230  T thresholdMin_;
231  T thresholdMax_;
232  T best_;
233  T worst_;
234  size_t sampleSize_;
235  using Buckets = vector<size_t>;
236  T unit_;
237  Buckets buckets_;
238 };
239 
240 } //stathistogram_detail
241 
242 template <typename T, bool DETAILED = true>
244 }}
245 
Definition: TypedString.hpp:84
collect sample values and keep histogram for top percentages
Definition: StatHistogram.hpp:40
Definition: Base.hpp:12