Fix spurious retransmit recovery on low latency networks

TCP's smoothed RTT (SRTT) can be much larger than an actual observed RTT. This can be either because of hz restricting the calculable RTT to 10ms in VMs or 1ms using the default 1000hz or simply because SRTT recently incorporated a larger value.

If an ACK arrives before the calculated badrxtwin (now + SRTT):
tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));

We'll erroneously reset snd_una to snd_max. If multiple segments were dropped and this happens repeatedly the transmit rate will be limited to 1MSS per RTO until we've retransmitted all drops.

Reported by:	rstone
Reviewed by:	hiren, transport
Approved by:	sbruno
MFC after:	1 month
Differential Revision:	https://reviews.freebsd.org/D8556
This commit is contained in:
Matt Macy 2018-05-08 02:22:34 +00:00
parent d5210708dd
commit 10d20c84ed
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=333346
3 changed files with 21 additions and 7 deletions

View File

@ -1682,6 +1682,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
to.to_tsecr -= tp->ts_offset;
if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
to.to_tsecr = 0;
else if (tp->t_flags & TF_PREVVALID &&
tp->t_badrxtwin != 0 && SEQ_LT(to.to_tsecr, tp->t_badrxtwin))
cc_cong_signal(tp, th, CC_RTO_ERR);
}
/*
* Process options only when we get SYN/ACK back. The SYN case
@ -1794,9 +1797,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
TCPSTAT_INC(tcps_predack);
/*
* "bad retransmit" recovery.
* "bad retransmit" recovery without timestamps.
*/
if (tp->t_rxtshift == 1 &&
if ((to.to_flags & TOF_TS) == 0 &&
tp->t_rxtshift == 1 &&
tp->t_flags & TF_PREVVALID &&
(int)(ticks - tp->t_badrxtwin) < 0) {
cc_cong_signal(tp, th, CC_RTO_ERR);
@ -2787,8 +2791,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* original cwnd and ssthresh, and proceed to transmit where
* we left off.
*/
if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
(int)(ticks - tp->t_badrxtwin) < 0)
if (tp->t_rxtshift == 1 &&
tp->t_flags & TF_PREVVALID &&
tp->t_badrxtwin &&
SEQ_LT(to.to_tsecr, tp->t_badrxtwin))
cc_cong_signal(tp, th, CC_RTO_ERR);
/*

View File

@ -206,7 +206,7 @@ tcp_output(struct tcpcb *tp)
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
unsigned ipsec_optlen = 0;
#endif
int idle, sendalot;
int idle, sendalot, curticks;
int sack_rxmit, sack_bytes_rxmt;
struct sackhole *p;
int tso, mtu;
@ -808,9 +808,12 @@ tcp_output(struct tcpcb *tp)
/* Timestamps. */
if ((tp->t_flags & TF_RCVD_TSTMP) ||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
curticks = tcp_ts_getticks();
to.to_tsval = curticks + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
if (tp->t_rxtshift == 1)
tp->t_badrxtwin = curticks;
}
/* Set receive buffer autosizing timestamp. */

View File

@ -693,7 +693,12 @@ tcp_timer_rexmt(void * xtp)
tp->t_flags |= TF_WASCRECOVERY;
else
tp->t_flags &= ~TF_WASCRECOVERY;
tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
if ((tp->t_flags & TF_RCVD_TSTMP) == 0)
tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
/* In the event that we've negotiated timestamps
* badrxtwin will be set to the value that we set
* the retransmitted packet's to_tsval to by tcp_output
*/
tp->t_flags |= TF_PREVVALID;
} else
tp->t_flags &= ~TF_PREVVALID;