From 3a538f4b002d98864c3e33baca6768488e6d4278 Mon Sep 17 00:00:00 2001 From: Jacob Leverich Date: Wed, 26 Dec 2012 17:08:00 -0700 Subject: [PATCH] Implement a log-based histogram sampler to increase fidelity for small samples and maximum range for large samples. --- ConnectionStats.h | 34 ++++++++++++++-- LogHistogramSampler.h | 93 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 LogHistogramSampler.h diff --git a/ConnectionStats.h b/ConnectionStats.h index 2b90920..27dfeb6 100644 --- a/ConnectionStats.h +++ b/ConnectionStats.h @@ -8,8 +8,10 @@ #ifdef USE_ADAPTIVE_SAMPLER #include "AdaptiveSampler.h" -#else +#elif defined(USE_HISTOGRAM_SAMPLER) #include "HistogramSampler.h" +#else +#include "LogHistogramSampler.h" #endif #include "AgentStats.h" #include "Operation.h" @@ -21,8 +23,10 @@ class ConnectionStats { ConnectionStats(bool _sampling = true) : #ifdef USE_ADAPTIVE_SAMPLER get_sampler(100000), set_sampler(100000), op_sampler(100000), -#else +#elif defined(USE_HISTOGRAM_SAMPLER) get_sampler(10000,1), set_sampler(10000,1), op_sampler(1000,1), +#else + get_sampler(200), set_sampler(200), op_sampler(100), #endif rx_bytes(0), tx_bytes(0), gets(0), sets(0), get_misses(0), sampling(_sampling) {} @@ -31,10 +35,14 @@ class ConnectionStats { AdaptiveSampler get_sampler; AdaptiveSampler set_sampler; AdaptiveSampler op_sampler; -#else +#elif defined(USE_HISTOGRAM_SAMPLER) HistogramSampler get_sampler; HistogramSampler set_sampler; HistogramSampler op_sampler; +#else + LogHistogramSampler get_sampler; + LogHistogramSampler set_sampler; + LogHistogramSampler op_sampler; #endif uint64_t rx_bytes, tx_bytes; @@ -160,7 +168,7 @@ class ConnectionStats { ); if (newline) printf("\n"); } -#else +#elif defined(USE_HISTOGRAM_SAMPLER) void print_stats(const char *tag, HistogramSampler &sampler, bool newline = true) { if (sampler.total() == 0) { @@ -170,6 +178,24 @@ class ConnectionStats { return; } + printf("%-7s %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f", + tag, sampler.average(), + sampler.get_nth(0), sampler.get_nth(1), sampler.get_nth(5), + sampler.get_nth(10), sampler.get_nth(90), + sampler.get_nth(95), sampler.get_nth(99)); + + if (newline) printf("\n"); + } +#else + void print_stats(const char *tag, LogHistogramSampler &sampler, + bool newline = true) { + if (sampler.total() == 0) { + printf("%-7s %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f", + tag, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); + if (newline) printf("\n"); + return; + } + printf("%-7s %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f %7.1f", tag, sampler.average(), sampler.get_nth(0), sampler.get_nth(1), sampler.get_nth(5), diff --git a/LogHistogramSampler.h b/LogHistogramSampler.h new file mode 100644 index 0000000..7f218a4 --- /dev/null +++ b/LogHistogramSampler.h @@ -0,0 +1,93 @@ +/* -*- c++ -*- */ +#ifndef LOGHISTOGRAMSAMPLER_H +#define LOGHISTOGRAMSAMPLER_H + +#include + +#include +#include + +#include "Operation.h" + +#define _POW 1.1 + +class LogHistogramSampler { +public: + std::vector bins; + + double sum; + + LogHistogramSampler() = delete; + LogHistogramSampler(int _bins) : sum(0.0) { + assert(_bins > 0); + + bins.resize(_bins + 1, 0); + } + + void sample(const Operation &op) { + sample(op.time()); + } + + void sample(double s) { + assert(s >= 0); + size_t bin = log(s)/log(_POW); + + sum += s; + + // I("%f", sum); + + if ((int64_t) bin < 0) { + bin = 0; + } else if (bin >= bins.size()) { + bin = bins.size() - 1; + } + + bins[bin]++; + } + + double average() { + // I("%f %d", sum, total()); + return sum / total(); + } + + double minimum() { + for (size_t i = 0; i < bins.size(); i++) + if (bins[i] > 0) return pow(_POW, (double) i + 0.5); + } + + double get_nth(double nth) { + uint64_t count = total(); + uint64_t n = 0; + double target = count * nth/100; + + for (size_t i = 0; i < bins.size(); i++) { + n += bins[i]; + + if (n > target) { // The nth is inside bins[i]. + double left = target - (n - bins[i]); + return pow(_POW, (double) i) + + left / bins[i] * (pow(_POW, (double) (i+1)) - pow(_POW, (double) i)); + } + } + + return pow(_POW, bins.size()); + } + + uint64_t total() { + uint64_t sum = 0.0; + + for (auto i: bins) sum += i; + + return sum; + } + + void accumulate(const LogHistogramSampler &h) { + assert(bins.size() == h.bins.size()); + + for (size_t i = 0; i < bins.size(); i++) bins[i] += h.bins[i]; + + sum += h.sum; + } +}; + +#endif // LOGHISTOGRAMSAMPLER_H