* Improve input validation of sysctl parameters for DCTPC.

* Initialize the alpha parameter to a conservative value (like Linux)
* Improve handling of arithmetic.
* Improve man-page

Obtained from:		Richard Scheffenegger
MFC after:		1 week
Differential Revision:	https://reviews.freebsd.org/D20549
This commit is contained in:
Michael Tuexen 2019-07-29 08:50:35 +00:00
parent 8de2d8c009
commit 333ba164d6
2 changed files with 46 additions and 36 deletions

View File

@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd January 12, 2015
.Dd July 29, 2019
.Dt CC_DCTCP 4
.Os
.Sh NAME
@ -60,7 +60,7 @@ In addition, when classic ECN is used as sender and DCTCP is used as
receiver, DCTCP avoids to mirror back ACKs only when the CWR flag is
set in the incoming packet.
.Pp
The other specifications are based on the paper and Internet Draft referenced
The other specifications are based on the paper and the RFC referenced
in the
.Sx SEE ALSO
section below.
@ -70,16 +70,27 @@ The algorithm exposes the following tunable variables in the
branch of the
.Xr sysctl 3
MIB:
.Bl -tag -width ".Va alpha"
.Bl -tag -width ".Va slowstart"
.It Va alpha
An initial estimator of the congestion on the link.
Default is 0.
.It Va dctcp_shift_g
An estimation gain in the alpha calculation.
Default is 16.
The initial value to estimate the congestion on the link.
The valid range is from 0 to 1024, where 1024 reduces the congestion
window to half, if a CE is observed in the first window and
.Va alpha
could not yet adjust to the congestion level on that path.
Default is 1024.
.It Va shift_g
An estimation gain in the
.Va alpha
calculation.
This influences the responsiveness when adjusting alpha
to the most recent observed window.
Valid range from 0 to 10, the default is 4, resulting in an effective
gain of 1 / ( 2 ^
.Va shift_g
), or 1/16th.
.It Va slowstart
A trigger to halve congestion window after slow start.
Default does nothing to halve window.
A flag if the congestion window should be reduced by one half after slow start.
Valid settings 0 and 1, default 0.
.El
.Sh SEE ALSO
.Xr cc_chd 4 ,
@ -108,10 +119,12 @@ Default does nothing to halve window.
.Re
.Rs
.%A "Stephen Bensley"
.%A "Lars Eggert"
.%A "Dave Thaler"
.%T "Microsoft's Datacenter TCP (DCTCP): TCP Congestion Control for Datacenters"
.%U "http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-01"
.%A "Praveen Balasubramanian"
.%A "Lars Eggert"
.%A "Glenn Judd"
.%T "Data Center TCP (DCTCP): TCP Congestion Control for Data Centers"
.%U "https://tools.ietf.org/html/rfc8257"
.Re
.Sh HISTORY
The

View File

@ -56,8 +56,9 @@ __FBSDID("$FreeBSD$");
#include <netinet/cc/cc.h>
#include <netinet/cc/cc_module.h>
#define MAX_ALPHA_VALUE 1024
VNET_DEFINE_STATIC(uint32_t, dctcp_alpha) = 0;
#define DCTCP_SHIFT 10
#define MAX_ALPHA_VALUE (1<<DCTCP_SHIFT)
VNET_DEFINE_STATIC(uint32_t, dctcp_alpha) = MAX_ALPHA_VALUE;
#define V_dctcp_alpha VNET(dctcp_alpha)
VNET_DEFINE_STATIC(uint32_t, dctcp_shift_g) = 4;
#define V_dctcp_shift_g VNET(dctcp_shift_g)
@ -65,14 +66,14 @@ VNET_DEFINE_STATIC(uint32_t, dctcp_slowstart) = 0;
#define V_dctcp_slowstart VNET(dctcp_slowstart)
struct dctcp {
int bytes_ecn; /* # of marked bytes during a RTT */
int bytes_total; /* # of acked bytes during a RTT */
int alpha; /* the fraction of marked bytes */
int ce_prev; /* CE state of the last segment */
int save_sndnxt; /* end sequence number of the current window */
int ece_curr; /* ECE flag in this segment */
int ece_prev; /* ECE flag in the last segment */
uint32_t num_cong_events; /* # of congestion events */
uint32_t bytes_ecn; /* # of marked bytes during a RTT */
uint32_t bytes_total; /* # of acked bytes during a RTT */
int alpha; /* the fraction of marked bytes */
int ce_prev; /* CE state of the last segment */
tcp_seq save_sndnxt; /* end sequence number of the current window */
int ece_curr; /* ECE flag in this segment */
int ece_prev; /* ECE flag in the last segment */
uint32_t num_cong_events; /* # of congestion events */
};
static MALLOC_DEFINE(M_dctcp, "dctcp data",
@ -369,18 +370,18 @@ dctcp_update_alpha(struct cc_var *ccv)
dctcp_data->bytes_total = max(dctcp_data->bytes_total, 1);
/*
* Update alpha: alpha = (1 - g) * alpha + g * F.
* Update alpha: alpha = (1 - g) * alpha + g * M.
* Here:
* g is weight factor
* recommaded to be set to 1/16
* small g = slow convergence between competitive DCTCP flows
* large g = impacts low utilization of bandwidth at switches
* F is fraction of marked segments in last RTT
* M is fraction of marked segments in last RTT
* updated every RTT
* Alpha must be round to 0 - MAX_ALPHA_VALUE.
*/
dctcp_data->alpha = min(alpha_prev - (alpha_prev >> V_dctcp_shift_g) +
(dctcp_data->bytes_ecn << (10 - V_dctcp_shift_g)) /
dctcp_data->alpha = ulmin(alpha_prev - (alpha_prev >> V_dctcp_shift_g) +
((uint64_t)dctcp_data->bytes_ecn << (DCTCP_SHIFT - V_dctcp_shift_g)) /
dctcp_data->bytes_total, MAX_ALPHA_VALUE);
/* Initialize internal parameters for next alpha calculation */
@ -398,14 +399,10 @@ dctcp_alpha_handler(SYSCTL_HANDLER_ARGS)
new = V_dctcp_alpha;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr != NULL) {
if (new > 1)
if (new > MAX_ALPHA_VALUE)
error = EINVAL;
else {
if (new > MAX_ALPHA_VALUE)
V_dctcp_alpha = MAX_ALPHA_VALUE;
else
V_dctcp_alpha = new;
}
else
V_dctcp_alpha = new;
}
return (error);
@ -420,7 +417,7 @@ dctcp_shift_g_handler(SYSCTL_HANDLER_ARGS)
new = V_dctcp_shift_g;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr != NULL) {
if (new > 1)
if (new > DCTCP_SHIFT)
error = EINVAL;
else
V_dctcp_shift_g = new;
@ -454,7 +451,7 @@ SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, dctcp, CTLFLAG_RW, NULL,
SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, alpha,
CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(dctcp_alpha), 0,
&dctcp_alpha_handler,
"IU", "dctcp alpha parameter");
"IU", "dctcp alpha parameter at start of session");
SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, shift_g,
CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(dctcp_shift_g), 4,