Introduce two new sysctl's:
net.inet.tcp.rexmit_min (default 3 ticks equiv) This sysctl is the retransmit timer RTO minimum, specified in milliseconds. This value is designed for algorithmic stability only. net.inet.tcp.rexmit_slop (default 200ms) This sysctl is the retransmit timer RTO slop which is added to every retransmit timeout and is designed to handle protocol stack overheads and delayed ack issues. Note that the *original* code applied a 1-second RTO minimum but never applied real slop to the RTO calculation, so any RTO calculation over one second would have no slop and thus not account for protocol stack overheads (TCP timestamps are not a measure of protocol turnaround!). Essentially, the original code made the RTO calculation almost completely irrelevant. Please note that the 200ms slop is debateable. This commit is not meant to be a line in the sand, and if the community winds up deciding that increasing it is the correct solution then it's easy to do. Note that larger values will destroy performance on lossy networks while smaller values may result in a greater number of unnecessary retransmits.
This commit is contained in:
parent
f4359ccbbc
commit
701bec5a38
@ -196,6 +196,8 @@ tcp_init()
|
||||
tcp_keepintvl = TCPTV_KEEPINTVL;
|
||||
tcp_maxpersistidle = TCPTV_KEEP_IDLE;
|
||||
tcp_msl = TCPTV_MSL;
|
||||
tcp_rexmit_min = TCPTV_MIN;
|
||||
tcp_rexmit_slop = TCPTV_CPU_VAR;
|
||||
|
||||
INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
|
||||
LIST_INIT(&tcb);
|
||||
@ -542,7 +544,7 @@ tcp_newtcpcb(inp)
|
||||
*/
|
||||
tp->t_srtt = TCPTV_SRTTBASE;
|
||||
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
|
||||
tp->t_rttmin = TCPTV_MIN;
|
||||
tp->t_rttmin = tcp_rexmit_min;
|
||||
tp->t_rxtcur = TCPTV_RTOBASE;
|
||||
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
|
@ -109,6 +109,14 @@ int tcp_msl;
|
||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
|
||||
&tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
|
||||
|
||||
int tcp_rexmit_min;
|
||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
|
||||
&tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout");
|
||||
|
||||
int tcp_rexmit_slop;
|
||||
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
|
||||
&tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", "Retransmission Timer Slop");
|
||||
|
||||
static int always_keepalive = 1;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
|
||||
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
|
||||
|
@ -92,11 +92,26 @@
|
||||
|
||||
/*
|
||||
* Minimum retransmit timer is 3 ticks, for algorithmic stability.
|
||||
* The maximum is 64 seconds. The prior minimum of 1*hz (1 second) badly
|
||||
* breaks throughput on any networks faster then a modem that has minor
|
||||
* (e.g. 1%) packet loss.
|
||||
* TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with
|
||||
* the expected worst-case processing variances by the kernels
|
||||
* representing the end points. Such variances do not always show
|
||||
* up in the srtt because the timestamp is often calculated at
|
||||
* the interface rather then at the TCP layer. This value is
|
||||
* typically 50ms. However, it is also possible that delayed
|
||||
* acks (typically 100ms) could create issues so we set the slop
|
||||
* to 200ms to try to cover it. Note that, properly speaking,
|
||||
* delayed-acks should not create a major issue for interactive
|
||||
* environments which 'P'ush the last segment, at least as
|
||||
* long as implementations do the required 'at least one ack
|
||||
* for every two packets' for the non-interactive streaming case.
|
||||
* (maybe the RTO calculation should use 2*RTT instead of RTT
|
||||
* to handle the ack-every-other-packet case).
|
||||
*
|
||||
* The prior minimum of 1*hz (1 second) badly breaks throughput on any
|
||||
* networks faster then a modem that has minor (e.g. 1%) packet loss.
|
||||
*/
|
||||
#define TCPTV_MIN ( 3 ) /* minimum allowable value */
|
||||
#define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */
|
||||
#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */
|
||||
|
||||
#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */
|
||||
@ -116,7 +131,7 @@ static char *tcptimers[] =
|
||||
* Force a time value to be in a certain range.
|
||||
*/
|
||||
#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
|
||||
(tv) = (value); \
|
||||
(tv) = (value) + tcp_rexmit_slop; \
|
||||
if ((u_long)(tv) < (u_long)(tvmin)) \
|
||||
(tv) = (tvmin); \
|
||||
else if ((u_long)(tv) > (u_long)(tvmax)) \
|
||||
@ -130,6 +145,8 @@ extern int tcp_keepintvl; /* time between keepalive probes */
|
||||
extern int tcp_maxidle; /* time to drop after starting probes */
|
||||
extern int tcp_delacktime; /* time before sending a delayed ACK */
|
||||
extern int tcp_maxpersistidle;
|
||||
extern int tcp_rexmit_min;
|
||||
extern int tcp_rexmit_slop;
|
||||
extern int tcp_msl;
|
||||
extern int tcp_ttl; /* time to live for TCP segs */
|
||||
extern int tcp_backoff[];
|
||||
|
@ -196,6 +196,8 @@ tcp_init()
|
||||
tcp_keepintvl = TCPTV_KEEPINTVL;
|
||||
tcp_maxpersistidle = TCPTV_KEEP_IDLE;
|
||||
tcp_msl = TCPTV_MSL;
|
||||
tcp_rexmit_min = TCPTV_MIN;
|
||||
tcp_rexmit_slop = TCPTV_CPU_VAR;
|
||||
|
||||
INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
|
||||
LIST_INIT(&tcb);
|
||||
@ -542,7 +544,7 @@ tcp_newtcpcb(inp)
|
||||
*/
|
||||
tp->t_srtt = TCPTV_SRTTBASE;
|
||||
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
|
||||
tp->t_rttmin = TCPTV_MIN;
|
||||
tp->t_rttmin = tcp_rexmit_min;
|
||||
tp->t_rxtcur = TCPTV_RTOBASE;
|
||||
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
|
||||
|
Loading…
Reference in New Issue
Block a user