One of the ways to detect loss is to count duplicate acks coming back from the
other end till it reaches predetermined threshold which is 3 for us right now. Once that happens, we trigger fast-retransmit to do loss recovery. Main problem with the current implementation is that we don't honor SACK information well to detect whether an incoming ack is a dupack or not. RFC6675 has latest recommendations for that. According to it, dupack is a segment that arrives carrying a SACK block that identifies previously unknown information between snd_una and snd_max even if it carries new data, changes the advertised window, or moves the cumulative acknowledgment point. With the prevalence of Selective ACK (SACK) these days, improper handling can lead to delayed loss recovery. With the fix, new behavior looks like following: 0) th_ack < snd_una --> ignore Old acks are ignored. 1) th_ack == snd_una, !sack_changed --> ignore Acks with SACK enabled but without any new SACK info in them are ignored. 2) th_ack == snd_una, window == old_window --> increment Increment on a good dupack. 3) th_ack == snd_una, window != old_window, sack_changed --> increment When SACK enabled, it's okay to have advertized window changed if the ack has new SACK info. 4) th_ack > snd_una --> reset to 0 Reset to 0 when left edge moves. 5) th_ack > snd_una, sack_changed --> increment Increment if left edge moves but there is new SACK info. Here, sack_changed is the indicator that incoming ack has previously unknown SACK info in it. Note: This fix is not fully compliant to RFC6675. That may require a few changes to current implementation in order to keep per-sackhole dupack counter and change to the way we mark/handle sack holes. PR: 203663 Reviewed by: jtl MFC after: 3 weeks Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D4225
This commit is contained in:
parent
b7df39cc12
commit
021eaf7996
@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
|
||||
int ti_locked)
|
||||
{
|
||||
int thflags, acked, ourfinisacked, needoutput = 0;
|
||||
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
|
||||
int rstreason, todrop, win;
|
||||
u_long tiwin;
|
||||
char *s;
|
||||
@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
thflags = th->th_flags;
|
||||
inc = &tp->t_inpcb->inp_inc;
|
||||
tp->sackhint.last_sack_ack = 0;
|
||||
sack_changed = 0;
|
||||
|
||||
/*
|
||||
* If this is either a state-changing packet or current state isn't
|
||||
@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
if ((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
((to.to_flags & TOF_SACK) ||
|
||||
!TAILQ_EMPTY(&tp->snd_holes)))
|
||||
tcp_sack_doack(tp, &to, th->th_ack);
|
||||
sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
|
||||
else
|
||||
/*
|
||||
* Reset the value so that previous (valid) value
|
||||
@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
hhook_run_tcp_est_in(tp, th, &to);
|
||||
|
||||
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
|
||||
if (tlen == 0 && tiwin == tp->snd_wnd) {
|
||||
if (tlen == 0 &&
|
||||
(tiwin == tp->snd_wnd ||
|
||||
(tp->t_flags & TF_SACK_PERMIT))) {
|
||||
/*
|
||||
* If this is the first time we've seen a
|
||||
* FIN from the remote, this is not a
|
||||
@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
* When using TCP ECN, notify the peer that
|
||||
* we reduced the cwnd.
|
||||
*/
|
||||
if (!tcp_timer_active(tp, TT_REXMT) ||
|
||||
th->th_ack != tp->snd_una)
|
||||
/*
|
||||
* Following 2 kinds of acks should not affect
|
||||
* dupack counting:
|
||||
* 1) Old acks
|
||||
* 2) Acks with SACK but without any new SACK
|
||||
* information in them. These could result from
|
||||
* any anomaly in the network like a switch
|
||||
* duplicating packets or a possible DoS attack.
|
||||
*/
|
||||
if (th->th_ack != tp->snd_una ||
|
||||
((tp->t_flags & TF_SACK_PERMIT) &&
|
||||
!sack_changed))
|
||||
break;
|
||||
else if (!tcp_timer_active(tp, TT_REXMT))
|
||||
tp->t_dupacks = 0;
|
||||
else if (++tp->t_dupacks > tcprexmtthresh ||
|
||||
IN_FASTRECOVERY(tp->t_flags)) {
|
||||
@ -2608,9 +2623,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tp->snd_cwnd = oldcwnd;
|
||||
goto drop;
|
||||
}
|
||||
} else
|
||||
tp->t_dupacks = 0;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* This ack is advancing the left edge, reset the
|
||||
* counter.
|
||||
*/
|
||||
tp->t_dupacks = 0;
|
||||
/*
|
||||
* If this ack also has new SACK info, increment the
|
||||
* counter as per rfc6675.
|
||||
*/
|
||||
if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
|
||||
tp->t_dupacks++;
|
||||
}
|
||||
|
||||
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
|
||||
@ -2629,7 +2655,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
} else
|
||||
cc_post_recovery(tp, th);
|
||||
}
|
||||
tp->t_dupacks = 0;
|
||||
/*
|
||||
* If we reach this point, ACK is not a duplicate,
|
||||
* i.e., it ACKs something we sent.
|
||||
|
@ -345,17 +345,22 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
|
||||
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
|
||||
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
|
||||
* the sequence space).
|
||||
* Returns 1 if incoming ACK has previously unknown SACK information,
|
||||
* 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
|
||||
* to that (i.e. left edge moving) would also be considered a change in SACK
|
||||
* information which is slightly different than rfc6675.
|
||||
*/
|
||||
void
|
||||
int
|
||||
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
|
||||
{
|
||||
struct sackhole *cur, *temp;
|
||||
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
|
||||
int i, j, num_sack_blks;
|
||||
int i, j, num_sack_blks, sack_changed;
|
||||
|
||||
INP_WLOCK_ASSERT(tp->t_inpcb);
|
||||
|
||||
num_sack_blks = 0;
|
||||
sack_changed = 0;
|
||||
/*
|
||||
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
|
||||
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
|
||||
@ -392,7 +397,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
|
||||
* received.
|
||||
*/
|
||||
if (num_sack_blks == 0)
|
||||
return;
|
||||
return (sack_changed);
|
||||
|
||||
/*
|
||||
* Sort the SACK blocks so we can update the scoreboard with just one
|
||||
@ -443,6 +448,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
|
||||
tp->snd_fack = sblkp->end;
|
||||
/* Go to the previous sack block. */
|
||||
sblkp--;
|
||||
sack_changed = 1;
|
||||
} else {
|
||||
/*
|
||||
* We failed to add a new hole based on the current
|
||||
@ -459,9 +465,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
|
||||
SEQ_LT(tp->snd_fack, sblkp->end))
|
||||
tp->snd_fack = sblkp->end;
|
||||
}
|
||||
} else if (SEQ_LT(tp->snd_fack, sblkp->end))
|
||||
} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
|
||||
/* fack is advanced. */
|
||||
tp->snd_fack = sblkp->end;
|
||||
sack_changed = 1;
|
||||
}
|
||||
/* We must have at least one SACK hole in scoreboard. */
|
||||
KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
|
||||
("SACK scoreboard must not be empty"));
|
||||
@ -490,6 +498,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
|
||||
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
|
||||
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
|
||||
("sackhint bytes rtx >= 0"));
|
||||
sack_changed = 1;
|
||||
if (SEQ_LEQ(sblkp->start, cur->start)) {
|
||||
/* Data acks at least the beginning of hole. */
|
||||
if (SEQ_GEQ(sblkp->end, cur->end)) {
|
||||
@ -545,6 +554,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
|
||||
else
|
||||
sblkp--;
|
||||
}
|
||||
return (sack_changed);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -741,7 +741,7 @@ void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
|
||||
extern struct pr_usrreqs tcp_usrreqs;
|
||||
tcp_seq tcp_new_isn(struct tcpcb *);
|
||||
|
||||
void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
|
||||
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
|
||||
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
|
||||
void tcp_clean_sackreport(struct tcpcb *tp);
|
||||
void tcp_sack_adjust(struct tcpcb *tp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user