tcp: make the maximum number of retransmissions tunable per VNET

Both Windows (TcpMaxDataRetransmissions) and Linux (tcp_retries2)
allow to restrict the maximum number of consecutive timer based
retransmissions. Add that same capability on a per-VNet basis to
FreeBSD.

Reviewed By:		cc, tuexen, #transport
Sponsored by:		NetApp, Inc.
Differential Revision:	https://reviews.freebsd.org/D40424
This commit is contained in:
Richard Scheffenegger 2023-06-06 22:56:44 +02:00
parent 84617f6fcc
commit 43b117f88f
6 changed files with 41 additions and 14 deletions

View File

@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
.Dd February 3, 2023
.Dd June 6, 2023
.Dt TCP 4
.Os
.Sh NAME
@ -843,6 +843,9 @@ Maximum size of automatic receive buffer.
Initial
.Tn TCP
receive window (buffer size).
.It Va retries
Maximum number of consecutive timer based retransmits sent after a data
segment is lost (default and maximum is 12).
.It Va rexmit_drop_options
Drop TCP options from third and later retransmitted SYN segments
of a connection.

View File

@ -1766,7 +1766,7 @@ tcp_setpersist(struct tcpcb *tp)
tt = maxunacktime;
}
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
}

View File

@ -4763,7 +4763,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
* the idle time (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
if (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
@ -4796,7 +4796,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
tp->t_flags &= ~TF_DELACK;
free(t_template, M_TEMP);
}
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
bbr_start_hpts_timer(bbr, tp, cts, 3, 0, 0);
out:
@ -4990,8 +4990,8 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
*/
tp->t_rxtshift++;
}
if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
if (tp->t_rxtshift > V_tcp_retries) {
tp->t_rxtshift = V_tcp_retries;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
/* XXXGL: previously t_softerror was casted to uint16_t */

View File

@ -7445,7 +7445,7 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
* the idle time (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
if (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
@ -7491,7 +7491,7 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
tp->t_flags &= ~TF_DELACK;
free(t_template, M_TEMP);
}
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
out:
rack_log_to_event(rack, RACK_TO_FRM_PERSIST, NULL);
@ -7783,10 +7783,10 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
*/
tp->t_rxtshift++;
}
if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
if (tp->t_rxtshift > V_tcp_retries) {
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
drop_it:
tp->t_rxtshift = TCP_MAXRXTSHIFT;
tp->t_rxtshift = V_tcp_retries;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
/* XXXGL: previously t_softerror was casted to uint16_t */
MPASS(tp->t_softerror >= 0);

View File

@ -201,6 +201,28 @@ static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
&per_cpu_timers , 0, "run tcp timers on all cpus");
static int
sysctl_net_inet_tcp_retries(SYSCTL_HANDLER_ARGS)
{
int error, new;
new = V_tcp_retries;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr) {
if ((new < 1) || (new > TCP_MAXRXTSHIFT))
error = EINVAL;
else
V_tcp_retries = new;
}
return (error);
}
VNET_DEFINE(int, tcp_retries) = TCP_MAXRXTSHIFT;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, retries,
CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_retries), 0, sysctl_net_inet_tcp_retries, "I",
"maximum number of consecutive timer based retransmissions");
/*
* Map the given inp to a CPU id.
*
@ -492,7 +514,7 @@ tcp_timer_persist(struct tcpcb *tp)
* progress.
*/
progdrop = tcp_maxunacktime_check(tp);
if (progdrop || (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
if (!progdrop)
@ -555,10 +577,10 @@ tcp_timer_rexmt(struct tcpcb *tp)
* or we've gone long enough without making progress, then drop
* the session.
*/
if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || tcp_maxunacktime_check(tp)) {
if (tp->t_rxtshift > TCP_MAXRXTSHIFT)
if (++tp->t_rxtshift > V_tcp_retries || tcp_maxunacktime_check(tp)) {
if (tp->t_rxtshift > V_tcp_retries)
TCPSTAT_INC(tcps_timeoutdrop);
tp->t_rxtshift = TCP_MAXRXTSHIFT;
tp->t_rxtshift = V_tcp_retries;
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);

View File

@ -1289,6 +1289,7 @@ VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
VNET_DECLARE(int, tcp_perconn_stats_enable);
#endif /* STATS */
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, tcp_retries);
VNET_DECLARE(int, tcp_sack_globalholes);
VNET_DECLARE(int, tcp_sack_globalmaxholes);
VNET_DECLARE(int, tcp_sack_maxholes);
@ -1335,6 +1336,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
#endif /* STATS */
#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_tcp_retries VNET(tcp_retries)
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes)