Introduce two new sysctl's:

net.inet.tcp.rexmit_min (default 3 ticks equiv)

    This sysctl is the retransmit timer RTO minimum,
    specified in milliseconds.  This value is
    designed for algorithmic stability only.

net.inet.tcp.rexmit_slop (default 200ms)

    This sysctl is the retransmit timer RTO slop
    which is added to every retransmit timeout and
    is designed to handle protocol stack overheads
    and delayed ack issues.

Note that the *original* code applied a 1-second
RTO minimum but never applied real slop to the RTO
calculation, so any RTO calculation over one second
would have no slop and thus not account for
protocol stack overheads (TCP timestamps are not
a measure of protocol turnaround!).  Essentially,
the original code made the RTO calculation almost
completely irrelevant.

Please note that the 200ms slop is debateable.
This commit is not meant to be a line in the sand,
and if the community winds up deciding that increasing
it is the correct solution then it's easy to do.
Note that larger values will destroy performance
on lossy networks while smaller values may result in
a greater number of unnecessary retransmits.
This commit is contained in:
Matthew Dillon 2002-07-18 19:06:12 +00:00
parent f4359ccbbc
commit 701bec5a38
4 changed files with 35 additions and 6 deletions

View File

@ -196,6 +196,8 @@ tcp_init()
tcp_keepintvl = TCPTV_KEEPINTVL;
tcp_maxpersistidle = TCPTV_KEEP_IDLE;
tcp_msl = TCPTV_MSL;
tcp_rexmit_min = TCPTV_MIN;
tcp_rexmit_slop = TCPTV_CPU_VAR;
INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
LIST_INIT(&tcb);
@ -542,7 +544,7 @@ tcp_newtcpcb(inp)
*/
tp->t_srtt = TCPTV_SRTTBASE;
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
tp->t_rttmin = TCPTV_MIN;
tp->t_rttmin = tcp_rexmit_min;
tp->t_rxtcur = TCPTV_RTOBASE;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;

View File

@ -109,6 +109,14 @@ int tcp_msl;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
&tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
int tcp_rexmit_min;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
&tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout");
int tcp_rexmit_slop;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
&tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", "Retransmission Timer Slop");
static int always_keepalive = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");

View File

@ -92,11 +92,26 @@
/*
* Minimum retransmit timer is 3 ticks, for algorithmic stability.
* The maximum is 64 seconds. The prior minimum of 1*hz (1 second) badly
* breaks throughput on any networks faster then a modem that has minor
* (e.g. 1%) packet loss.
* TCPT_RANGESET() will add another TCPTV_CPU_VAR to deal with
* the expected worst-case processing variances by the kernels
* representing the end points. Such variances do not always show
* up in the srtt because the timestamp is often calculated at
* the interface rather then at the TCP layer. This value is
* typically 50ms. However, it is also possible that delayed
* acks (typically 100ms) could create issues so we set the slop
* to 200ms to try to cover it. Note that, properly speaking,
* delayed-acks should not create a major issue for interactive
* environments which 'P'ush the last segment, at least as
* long as implementations do the required 'at least one ack
* for every two packets' for the non-interactive streaming case.
* (maybe the RTO calculation should use 2*RTT instead of RTT
* to handle the ack-every-other-packet case).
*
* The prior minimum of 1*hz (1 second) badly breaks throughput on any
* networks faster then a modem that has minor (e.g. 1%) packet loss.
*/
#define TCPTV_MIN ( 3 ) /* minimum allowable value */
#define TCPTV_CPU_VAR ( hz/5 ) /* cpu variance allowed (200ms) */
#define TCPTV_REXMTMAX ( 64*hz) /* max allowable REXMT value */
#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */
@ -116,7 +131,7 @@ static char *tcptimers[] =
* Force a time value to be in a certain range.
*/
#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
(tv) = (value); \
(tv) = (value) + tcp_rexmit_slop; \
if ((u_long)(tv) < (u_long)(tvmin)) \
(tv) = (tvmin); \
else if ((u_long)(tv) > (u_long)(tvmax)) \
@ -130,6 +145,8 @@ extern int tcp_keepintvl; /* time between keepalive probes */
extern int tcp_maxidle; /* time to drop after starting probes */
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
extern int tcp_rexmit_slop;
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];

View File

@ -196,6 +196,8 @@ tcp_init()
tcp_keepintvl = TCPTV_KEEPINTVL;
tcp_maxpersistidle = TCPTV_KEEP_IDLE;
tcp_msl = TCPTV_MSL;
tcp_rexmit_min = TCPTV_MIN;
tcp_rexmit_slop = TCPTV_CPU_VAR;
INP_INFO_LOCK_INIT(&tcbinfo, "tcp");
LIST_INIT(&tcb);
@ -542,7 +544,7 @@ tcp_newtcpcb(inp)
*/
tp->t_srtt = TCPTV_SRTTBASE;
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
tp->t_rttmin = TCPTV_MIN;
tp->t_rttmin = tcp_rexmit_min;
tp->t_rxtcur = TCPTV_RTOBASE;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;