From 1eb402e47af35b3980e6bd51ec462de3a3faa2c8 Mon Sep 17 00:00:00 2001
From: Lawrence Stewart <lstewart@FreeBSD.org>
Date: Fri, 2 Apr 2021 12:29:29 +1100
Subject: [PATCH] stats(3): Improve t-digest merging of samples which result in
 mu adjustment underflow.

Allow the calculation of the mu adjustment factor to underflow instead of
rejecting the VOI sample from the digest and logging an error. This trades off
some (currently unquantified) additional centroid error in exchange for better
fidelity of the distribution's density, which is the right trade off at the
moment until follow up work to better handle and track accumulated error can be
undertaken.

Obtained from:	Netflix
MFC after:	immediately
---
 sys/kern/subr_stats.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/sys/kern/subr_stats.c b/sys/kern/subr_stats.c
index 9dd874fcbcf8..946999263898 100644
--- a/sys/kern/subr_stats.c
+++ b/sys/kern/subr_stats.c
@@ -3255,9 +3255,41 @@ stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
 		if (is32bit) {
 			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
 			error = Q_QSUBQ(&x, ctd32->mu);
+			/*
+			 * The following calculation "x / (cnt + weight)"
+			 * computes the amount by which to adjust the centroid's
+			 * mu value in order to merge in the VOI sample.
+			 *
+			 * It can underflow (Q_QDIVI() returns ERANGE) when the
+			 * user centroids' fractional precision (which is
+			 * inherited by 'x') is too low to represent the result.
+			 *
+			 * A sophisticated approach to dealing with this issue
+			 * would minimise accumulation of error by tracking
+			 * underflow per centroid and making an adjustment when
+			 * a LSB's worth of underflow has accumulated.
+			 *
+			 * A simpler approach is to let the result underflow
+			 * i.e. merge the VOI sample into the centroid without
+			 * adjusting the centroid's mu, and rely on the user to
+			 * specify their t-digest with sufficient centroid
+			 * fractional precision such that the accumulation of
+			 * error from multiple underflows is of no material
+			 * consequence to the centroid's final value of mu.
+			 *
+			 * For the moment, the latter approach is employed by
+			 * simply ignoring ERANGE here.
+			 *
+			 * XXXLAS: Per-centroid underflow tracking is likely too
+			 * onerous, but it probably makes sense to accumulate a
+			 * single underflow error variable across all centroids
+			 * and report it as part of the digest to provide
+			 * additional visibility into the digest's fidelity.
+			 */
 			error = error ? error :
 			    Q_QDIVI(&x, ctd32->cnt + weight);
-			if (error || (error = Q_QADDQ(&ctd32->mu, x))) {
+			if ((error && error != ERANGE)
+			    || (error = Q_QADDQ(&ctd32->mu, x))) {
 #ifdef DIAGNOSTIC
 				KASSERT(!error, ("%s: unexpected error %d",
 				    __func__, error));
@@ -3276,7 +3308,9 @@ stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
 			error = Q_QSUBQ(&x, ctd64->mu);
 			error = error ? error :
 			    Q_QDIVI(&x, ctd64->cnt + weight);
-			if (error || (error = Q_QADDQ(&ctd64->mu, x))) {
+			/* Refer to is32bit ERANGE discussion above. */
+			if ((error && error != ERANGE)
+			    || (error = Q_QADDQ(&ctd64->mu, x))) {
 				KASSERT(!error, ("%s: unexpected error %d",
 				    __func__, error));
 				return (error);