hmbdc
simplify-high-performance-messaging-programming
StatHistogram.hpp
1 #include "hmbdc/Copyright.hpp"
2 #pragma once
3 
4 #include "hmbdc/Exception.hpp"
5 #include <map>
6 #include <vector>
7 #include <utility>
8 #include <limits>
9 #include <algorithm>
10 #include <stdexcept>
11 #include <ext/mt_allocator.h>
12 
13 namespace hmbdc { namespace numeric {
14 namespace stathistogram_detail {
15 
17  template <typename Hist>
18  static
19  void display(std::ostream& os, Hist const& hist, size_t sampleSize
20  , std::vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) {
21  auto h = hist.report(percentages);
22  for (auto i = 0u; i < percentages.size(); ++i) {
23  os << percentages[i] << "%=" << h[i] << ',';
24  }
25  os << "sample=" << sampleSize;
26  }
27 
28 };
29 
30 /**
31  * @brief collect sample values and keep histogram for top percentages
32  * @details top values are the smaller values
33  *
34  * @tparam T value type that supports less than operator
35  * @tparam DETAILED if false, the samples are kept in coarser grain and the class's
36  * speed performance is better
37  */
38 template <typename T, bool DETAILED = true>
40 : private StatHistogramBase {
42  : threshold_(std::numeric_limits<T>::max())
43  , worst_(std::numeric_limits<T>::min())
44  , sampleSize_(0ul)
45  {}
46 
47  explicit StatHistogram(T threshold)
48  : threshold_(threshold)
49  , worst_(std::numeric_limits<T>::min())
50  , sampleSize_(0ul){}
51 
52  bool add(T sample) {
53  ++sampleSize_;
54  if (sample < threshold_)
55  buckets_[sample]++;
56  else
57  buckets_[threshold_]++;
58 
59  if (sample > worst_) {
60  worst_ = sample;
61  return true;
62  }
63  return false;
64  }
65 
66  size_t sampleSize() const {
67  return sampleSize_;
68  }
69 
70  StatHistogram<T>& operator += (StatHistogram<T> const& other) {
71  if (threshold_ == other.threshold_) {
72  for (auto const& v : other.buckets_) {
73  buckets_[v.first] += v.second;
74  }
75  worst_ = std::max(worst_, other.worst_);
76  } else {
77  HMBDC_THROW(std::runtime_error, "histogram collection parameters mismatch - failed");
78  }
79  sampleSize_ += other.sampleSize_;
80  return *this;
81  }
82 
83  std::vector<T> report(std::vector<float> percentages
84  = {0, 1, 10, 50, 90, 99, 100}) const {
85  std::vector<T> p(percentages.size());
86  if (!buckets_.empty() && !p.empty()) {
87  *p.begin() = buckets_.begin()->first;
88  *p.rbegin() = worst_;
89  }
90  size_t count = 0;
91  size_t perIndex = 1;
92  for(auto& i : buckets_) {
93  count += i.second;
94  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
95  if (count * 100ul >= percentages[j] * sampleSize_) {
96  p[j] = i.first;
97  perIndex++;
98  } else {
99  break;
100  }
101  }
102  }
103 
104  return p;
105  }
106 
107  void display(std::ostream& os
108  , std::vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
109  StatHistogramBase::display(os, *this, sampleSize_, percentages);
110  }
111 
112  friend
113  std::ostream& operator << (std::ostream& os, StatHistogram const& hist) {
114  hist.display(os);
115  return os;
116  }
117 private:
118 
119  using Buckets = std::map<T, size_t, std::less<T>
120  , __gnu_cxx::__mt_alloc<std::pair<const T, size_t>>
121  >;
122  Buckets buckets_;
123  T threshold_;
124  T worst_;
125  size_t sampleSize_;
126 };
127 
128 template <typename T>
129 struct StatHistogram<T, false>
130 : private StatHistogramBase {
132  T thresholdMin
133  , T thresholdMax
134  , size_t bucketCount = 1000u)
135  : thresholdMin_(thresholdMin)
136  , thresholdMax_(thresholdMax)
137  , best_(std::numeric_limits<T>::max())
138  , worst_(std::numeric_limits<T>::min())
139  , sampleSize_(0ul)
140  , unit_((thresholdMax - thresholdMin) / bucketCount)
141  , buckets_(bucketCount + 1) {
142  if (thresholdMax <= thresholdMin) {
143  HMBDC_THROW(std::runtime_error, "thresholdMax <= thresholdMin");
144  }
145  }
146 
147  int add(T sample) {
148  ++sampleSize_;
149 
150  if (sample < thresholdMin_)
151  buckets_[0]++;
152  else if (sample < thresholdMax_)
153  buckets_[(sample - thresholdMin_) / unit_]++;
154  else
155  buckets_[buckets_.size() - 1]++;
156 
157  auto res = 0;
158  if (sample < best_) {
159  best_ = sample;
160  res = -1;
161  }
162  if (sample > worst_) {
163  worst_ = sample;
164  res = 1;
165  }
166 
167  return res;
168  }
169 
170  size_t sampleSize() const {
171  return sampleSize_;
172  }
173 
174  StatHistogram<T, false>& operator += (StatHistogram<T, false> const& other) {
175  if (thresholdMax_ == other.thresholdMax_ &&
176  thresholdMin_ == other.thresholdMin_ &&
177  buckets_.size() == other.buckets_.size()) {
178  for (auto i = 0u; i < buckets_.size(); ++i) {
179  buckets_[i] += other.buckets_[i];
180  }
181  worst_ = std::max(worst_, other.worst_);
182  best_ = std::min(best_, other.best_);
183  sampleSize_ += other.sampleSize_;
184  } else {
185  HMBDC_THROW(std::runtime_error, "thresholds or bucketCount mismatch - failed");
186  }
187  return *this;
188  }
189 
190  std::vector<T> report(std::vector<float> percentages
191  = {0, 1, 10, 50, 90, 99, 100}) const {
192 
193  std::vector<T> p(percentages.size());
194  if (sampleSize_ && !p.empty()) {
195  *p.begin() = best_;
196  *p.rbegin() = worst_;
197  size_t count = 0;
198  auto val = thresholdMin_;
199  size_t perIndex = 1;
200  for(auto& i : buckets_) {
201  count += i;
202  val += unit_;
203  for (auto j = perIndex; j < percentages.size() - 1; ++j) {
204  if (count * 100ul >= percentages[j] * sampleSize_) {
205  p[j] = std::min(val, worst_);
206  perIndex++;
207  } else {
208  break;
209  }
210  }
211  }
212  }
213 
214  return p;
215  }
216 
217  void display(std::ostream& os
218  , std::vector<float> percentages = {0, 1, 10, 50, 90, 99, 100}) const {
219  StatHistogramBase::display(os, *this, sampleSize_, percentages);
220  }
221 
222  friend
223  std::ostream& operator << (std::ostream& os, StatHistogram const& hist) {
224  hist.display(os);
225  return os;
226  }
227 
228 private:
229  T thresholdMin_;
230  T thresholdMax_;
231  T best_;
232  T worst_;
233  size_t sampleSize_;
234  using Buckets = std::vector<size_t>;
235  T unit_;
236  Buckets buckets_;
237 };
238 
239 } //stathistogram_detail
240 
241 template <typename T, bool DETAILED = true>
243 }}
244 
collect sample values and keep histogram for top percentages
Definition: StatHistogram.hpp:39
Definition: Base.hpp:12