Add TCP feature Proportional Rate Reduction (PRR) - RFC6937
PRR improves loss recovery and avoids RTOs in a wide range of scenarios (ACK thinning) over regular SACK loss recovery. PRR is disabled by default, enable by net.inet.tcp.do_prr = 1. Performance may be impeded by token bucket rate policers at the bottleneck, where net.inet.tcp.do_prr_conservate = 1 should be enabled in addition. Submitted by: Aris Angelogiannopoulos Sponsored by: NetApp, Inc. Differential Revision: https://reviews.freebsd.org/D18892
This commit is contained in:
parent
34af05ead3
commit
0e1d7c25c5
@ -153,6 +153,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW,
|
||||
&VNET_NAME(drop_synfin), 0,
|
||||
"Drop TCP packets with SYN+FIN set");
|
||||
|
||||
VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
|
||||
&VNET_NAME(tcp_do_prr_conservative), 0,
|
||||
"Do conservative Proportional Rate Reduction");
|
||||
|
||||
VNET_DEFINE(int, tcp_do_prr) = 1;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
|
||||
&VNET_NAME(tcp_do_prr), 1,
|
||||
"Enable Proportional Rate Reduction per RFC 6937");
|
||||
|
||||
VNET_DEFINE(int, tcp_do_newcwv) = 0;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
|
||||
&VNET_NAME(tcp_do_newcwv), 0,
|
||||
@ -2554,7 +2564,55 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
IN_FASTRECOVERY(tp->t_flags)) {
|
||||
cc_ack_received(tp, th, nsegs,
|
||||
CC_DUPACK);
|
||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
if (V_tcp_do_prr &&
|
||||
IN_FASTRECOVERY(tp->t_flags) &&
|
||||
(tp->t_flags & TF_SACK_PERMIT)) {
|
||||
long snd_cnt = 0, limit = 0;
|
||||
long del_data = 0, pipe = 0;
|
||||
/*
|
||||
* In a duplicate ACK del_data is only the
|
||||
* diff_in_sack. If no SACK is used del_data
|
||||
* will be 0. Pipe is the amount of data we
|
||||
* estimate to be in the network.
|
||||
*/
|
||||
del_data = tp->sackhint.delivered_data;
|
||||
pipe = (tp->snd_nxt - tp->snd_fack) +
|
||||
tp->sackhint.sack_bytes_rexmit;
|
||||
tp->sackhint.prr_delivered += del_data;
|
||||
if (pipe > tp->snd_ssthresh) {
|
||||
snd_cnt = (tp->sackhint.prr_delivered *
|
||||
tp->snd_ssthresh /
|
||||
tp->sackhint.recover_fs) +
|
||||
1 - tp->sackhint.sack_bytes_rexmit;
|
||||
} else {
|
||||
if (V_tcp_do_prr_conservative)
|
||||
limit = tp->sackhint.prr_delivered -
|
||||
tp->sackhint.sack_bytes_rexmit;
|
||||
else
|
||||
if ((tp->sackhint.prr_delivered -
|
||||
tp->sackhint.sack_bytes_rexmit) >
|
||||
del_data)
|
||||
limit = tp->sackhint.prr_delivered -
|
||||
tp->sackhint.sack_bytes_rexmit +
|
||||
maxseg;
|
||||
else
|
||||
limit = del_data + maxseg;
|
||||
if ((tp->snd_ssthresh - pipe) < limit)
|
||||
snd_cnt = tp->snd_ssthresh - pipe;
|
||||
else
|
||||
snd_cnt = limit;
|
||||
}
|
||||
snd_cnt = max((snd_cnt / maxseg), 0);
|
||||
/*
|
||||
* Send snd_cnt new data into the network in
|
||||
* response to this ACK. If there is a going
|
||||
* to be a SACK retransmission, adjust snd_cwnd
|
||||
* accordingly.
|
||||
*/
|
||||
tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
|
||||
tp->sackhint.sack_bytes_rexmit +
|
||||
(snd_cnt * maxseg);
|
||||
} else if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
IN_FASTRECOVERY(tp->t_flags)) {
|
||||
int awnd;
|
||||
|
||||
@ -2583,13 +2641,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tcp_seq onxt = tp->snd_nxt;
|
||||
|
||||
/*
|
||||
* If we're doing sack, check to
|
||||
* see if we're already in sack
|
||||
* If we're doing sack, or prr, check
|
||||
* to see if we're already in sack
|
||||
* recovery. If we're not doing sack,
|
||||
* check to see if we're in newreno
|
||||
* recovery.
|
||||
*/
|
||||
if (tp->t_flags & TF_SACK_PERMIT) {
|
||||
if (V_tcp_do_prr ||
|
||||
(tp->t_flags & TF_SACK_PERMIT)) {
|
||||
if (IN_FASTRECOVERY(tp->t_flags)) {
|
||||
tp->t_dupacks = 0;
|
||||
break;
|
||||
@ -2607,6 +2666,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
CC_DUPACK);
|
||||
tcp_timer_activate(tp, TT_REXMT, 0);
|
||||
tp->t_rtttime = 0;
|
||||
if (V_tcp_do_prr) {
|
||||
/*
|
||||
* snd_ssthresh is already updated by
|
||||
* cc_cong_signal.
|
||||
*/
|
||||
tp->sackhint.prr_delivered = 0;
|
||||
tp->sackhint.sack_bytes_rexmit = 0;
|
||||
if (!(tp->sackhint.recover_fs = tp->snd_nxt - tp->snd_una))
|
||||
tp->sackhint.recover_fs = 1;
|
||||
}
|
||||
if (tp->t_flags & TF_SACK_PERMIT) {
|
||||
TCPSTAT_INC(
|
||||
tcps_sack_recovery_episode);
|
||||
@ -2713,7 +2782,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
if (IN_FASTRECOVERY(tp->t_flags)) {
|
||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
|
||||
if (tp->t_flags & TF_SACK_PERMIT)
|
||||
tcp_sack_partialack(tp, th);
|
||||
if (V_tcp_do_prr)
|
||||
tcp_prr_partialack(tp, th);
|
||||
else
|
||||
tcp_sack_partialack(tp, th);
|
||||
else
|
||||
tcp_newreno_partial_ack(tp, th);
|
||||
} else
|
||||
@ -3839,6 +3911,54 @@ tcp_mssopt(struct in_conninfo *inc)
|
||||
return (mss);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th)
|
||||
{
|
||||
long snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
|
||||
int maxseg = tcp_maxseg(tp);
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
tcp_timer_activate(tp, TT_REXMT, 0);
|
||||
tp->t_rtttime = 0;
|
||||
/*
|
||||
* Compute the amount of data that this ACK is indicating
|
||||
* (del_data) and an estimate of how many bytes are in the
|
||||
* network.
|
||||
*/
|
||||
if (SEQ_GEQ(th->th_ack, tp->snd_una))
|
||||
del_data = BYTES_THIS_ACK(tp, th);
|
||||
del_data += tp->sackhint.delivered_data;
|
||||
pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
|
||||
tp->sackhint.prr_delivered += del_data;
|
||||
/*
|
||||
* Proportional Rate Reduction
|
||||
*/
|
||||
if (pipe > tp->snd_ssthresh)
|
||||
snd_cnt = (tp->sackhint.prr_delivered * tp->snd_ssthresh / tp->sackhint.recover_fs) -
|
||||
tp->sackhint.sack_bytes_rexmit;
|
||||
else {
|
||||
if (V_tcp_do_prr_conservative)
|
||||
limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit;
|
||||
else
|
||||
if ((tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit) > del_data)
|
||||
limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit + maxseg;
|
||||
else
|
||||
limit = del_data + maxseg;
|
||||
snd_cnt = min((tp->snd_ssthresh - pipe), limit);
|
||||
}
|
||||
snd_cnt = max((snd_cnt / maxseg), 0);
|
||||
/*
|
||||
* Send snd_cnt new data into the network in response to this ack.
|
||||
* If there is going to be a SACK retransmission, adjust snd_cwnd
|
||||
* accordingly.
|
||||
*/
|
||||
tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
|
||||
tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
(void) tcp_output(tp);
|
||||
}
|
||||
|
||||
/*
|
||||
* On a partial ack arrives, force the retransmission of the
|
||||
* next unacknowledged segment. Do not clear tp->t_dupacks.
|
||||
|
@ -113,8 +113,9 @@ struct sackhint {
|
||||
int32_t sacked_bytes; /* Total sacked bytes reported by the
|
||||
* receiver via sack option
|
||||
*/
|
||||
uint32_t _pad1[1]; /* TBD */
|
||||
uint64_t _pad[1]; /* TBD */
|
||||
uint32_t recover_fs; /* Flight Size at the start of Loss recovery */
|
||||
uint32_t prr_delivered; /* Total bytes delivered using PRR */
|
||||
uint32_t _pad[1]; /* TBD */
|
||||
};
|
||||
|
||||
#define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)
|
||||
@ -866,6 +867,8 @@ VNET_DECLARE(int, tcp_sendspace);
|
||||
VNET_DECLARE(struct inpcbhead, tcb);
|
||||
VNET_DECLARE(struct inpcbinfo, tcbinfo);
|
||||
|
||||
#define V_tcp_do_prr VNET(tcp_do_prr)
|
||||
#define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative)
|
||||
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
|
||||
#define V_drop_synfin VNET(drop_synfin)
|
||||
#define V_path_mtu_discovery VNET(path_mtu_discovery)
|
||||
@ -1051,6 +1054,7 @@ void tcp_clean_dsack_blocks(struct tcpcb *tp);
|
||||
void tcp_clean_sackreport(struct tcpcb *tp);
|
||||
void tcp_sack_adjust(struct tcpcb *tp);
|
||||
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
|
||||
void tcp_prr_partialack(struct tcpcb *, struct tcphdr *);
|
||||
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
|
||||
void tcp_free_sackholes(struct tcpcb *tp);
|
||||
int tcp_newreno(struct tcpcb *, struct tcphdr *);
|
||||
|
Loading…
Reference in New Issue
Block a user