Implement a log-based histogram sampler to increase fidelity for small samples and maximum range for large samples.

This commit is contained in:
Jacob Leverich 2012-12-26 17:08:00 -07:00
parent 46015b29ac
commit 3a538f4b00
2 changed files with 123 additions and 4 deletions

View File

@ -8,8 +8,10 @@
#ifdef USE_ADAPTIVE_SAMPLER
#include "AdaptiveSampler.h"
#else
#elif defined(USE_HISTOGRAM_SAMPLER)
#include "HistogramSampler.h"
#else
#include "LogHistogramSampler.h"
#endif
#include "AgentStats.h"
#include "Operation.h"
@ -21,8 +23,10 @@ class ConnectionStats {
ConnectionStats(bool _sampling = true) :
#ifdef USE_ADAPTIVE_SAMPLER
get_sampler(100000), set_sampler(100000), op_sampler(100000),
#else
#elif defined(USE_HISTOGRAM_SAMPLER)
get_sampler(10000,1), set_sampler(10000,1), op_sampler(1000,1),
#else
get_sampler(200), set_sampler(200), op_sampler(100),
#endif
rx_bytes(0), tx_bytes(0), gets(0), sets(0),
get_misses(0), sampling(_sampling) {}
@ -31,10 +35,14 @@ class ConnectionStats {
AdaptiveSampler<Operation> get_sampler;
AdaptiveSampler<Operation> set_sampler;
AdaptiveSampler<double> op_sampler;
#else
#elif defined(USE_HISTOGRAM_SAMPLER)
HistogramSampler get_sampler;
HistogramSampler set_sampler;
HistogramSampler op_sampler;
#else
LogHistogramSampler get_sampler;
LogHistogramSampler set_sampler;
LogHistogramSampler op_sampler;
#endif
uint64_t rx_bytes, tx_bytes;
@ -160,7 +168,7 @@ class ConnectionStats {
);
if (newline) printf("\n");
}
#else
#elif defined(USE_HISTOGRAM_SAMPLER)
void print_stats(const char *tag, HistogramSampler &sampler,
bool newline = true) {
if (sampler.total() == 0) {
@ -170,6 +178,24 @@ class ConnectionStats {
return;
}
printf("%-7s %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f",
tag, sampler.average(),
sampler.get_nth(0), sampler.get_nth(1), sampler.get_nth(5),
sampler.get_nth(10), sampler.get_nth(90),
sampler.get_nth(95), sampler.get_nth(99));
if (newline) printf("\n");
}
#else
void print_stats(const char *tag, LogHistogramSampler &sampler,
bool newline = true) {
if (sampler.total() == 0) {
printf("%-7s %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f",
tag, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
if (newline) printf("\n");
return;
}
printf("%-7s %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f",
tag, sampler.average(),
sampler.get_nth(0), sampler.get_nth(1), sampler.get_nth(5),

93
LogHistogramSampler.h Normal file
View File

@ -0,0 +1,93 @@
/* -*- c++ -*- */
#ifndef LOGHISTOGRAMSAMPLER_H
#define LOGHISTOGRAMSAMPLER_H
#include <inttypes.h>
#include <assert.h>
#include <vector>
#include "Operation.h"
#define _POW 1.1
class LogHistogramSampler {
public:
std::vector<uint64_t> bins;
double sum;
LogHistogramSampler() = delete;
LogHistogramSampler(int _bins) : sum(0.0) {
assert(_bins > 0);
bins.resize(_bins + 1, 0);
}
void sample(const Operation &op) {
sample(op.time());
}
void sample(double s) {
assert(s >= 0);
size_t bin = log(s)/log(_POW);
sum += s;
// I("%f", sum);
if ((int64_t) bin < 0) {
bin = 0;
} else if (bin >= bins.size()) {
bin = bins.size() - 1;
}
bins[bin]++;
}
double average() {
// I("%f %d", sum, total());
return sum / total();
}
double minimum() {
for (size_t i = 0; i < bins.size(); i++)
if (bins[i] > 0) return pow(_POW, (double) i + 0.5);
}
double get_nth(double nth) {
uint64_t count = total();
uint64_t n = 0;
double target = count * nth/100;
for (size_t i = 0; i < bins.size(); i++) {
n += bins[i];
if (n > target) { // The nth is inside bins[i].
double left = target - (n - bins[i]);
return pow(_POW, (double) i) +
left / bins[i] * (pow(_POW, (double) (i+1)) - pow(_POW, (double) i));
}
}
return pow(_POW, bins.size());
}
uint64_t total() {
uint64_t sum = 0.0;
for (auto i: bins) sum += i;
return sum;
}
void accumulate(const LogHistogramSampler &h) {
assert(bins.size() == h.bins.size());
for (size_t i = 0; i < bins.size(); i++) bins[i] += h.bins[i];
sum += h.sum;
}
};
#endif // LOGHISTOGRAMSAMPLER_H