One of the ways to detect loss is to count duplicate acks coming back from the

other end till it reaches predetermined threshold which is 3 for us right now.
Once that happens, we trigger fast-retransmit to do loss recovery.

Main problem with the current implementation is that we don't honor SACK
information well to detect whether an incoming ack is a dupack or not. RFC6675
has latest recommendations for that. According to it, dupack is a segment that
arrives carrying a SACK block that identifies previously unknown information
between snd_una and snd_max even if it carries new data, changes the advertised
window, or moves the cumulative acknowledgment point.

With the prevalence of Selective ACK (SACK) these days, improper handling can
lead to delayed loss recovery.

With the fix, new behavior looks like following:

0) th_ack < snd_una --> ignore
Old acks are ignored.
1) th_ack == snd_una, !sack_changed --> ignore
Acks with SACK enabled but without any new SACK info in them are ignored.
2) th_ack == snd_una, window == old_window --> increment
Increment on a good dupack.
3) th_ack == snd_una, window != old_window, sack_changed --> increment
When SACK enabled, it's okay to have advertized window changed if the ack has
new SACK info.
4) th_ack > snd_una --> reset to 0
Reset to 0 when left edge moves.
5) th_ack > snd_una, sack_changed --> increment
Increment if left edge moves but there is new SACK info.

Here, sack_changed is the indicator that incoming ack has previously unknown
SACK info in it.

Note: This fix is not fully compliant to RFC6675. That may require a few
changes to current implementation in order to keep per-sackhole dupack counter
and change to the way we mark/handle sack holes.

PR:			203663
Reviewed by:		jtl
MFC after:		3 weeks
Sponsored by:		Limelight Networks
Differential Revision:	https://reviews.freebsd.org/D4225
This commit is contained in:
Hiren Panchasara 2015-12-08 21:21:48 +00:00
parent b7df39cc12
commit 021eaf7996
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=292003
3 changed files with 48 additions and 13 deletions

View File

@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
int ti_locked)
{
int thflags, acked, ourfinisacked, needoutput = 0;
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
int rstreason, todrop, win;
u_long tiwin;
char *s;
@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
thflags = th->th_flags;
inc = &tp->t_inpcb->inp_inc;
tp->sackhint.last_sack_ack = 0;
sack_changed = 0;
/*
* If this is either a state-changing packet or current state isn't
@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((tp->t_flags & TF_SACK_PERMIT) &&
((to.to_flags & TOF_SACK) ||
!TAILQ_EMPTY(&tp->snd_holes)))
tcp_sack_doack(tp, &to, th->th_ack);
sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
else
/*
* Reset the value so that previous (valid) value
@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
hhook_run_tcp_est_in(tp, th, &to);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
if (tlen == 0 &&
(tiwin == tp->snd_wnd ||
(tp->t_flags & TF_SACK_PERMIT))) {
/*
* If this is the first time we've seen a
* FIN from the remote, this is not a
@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
* When using TCP ECN, notify the peer that
* we reduced the cwnd.
*/
if (!tcp_timer_active(tp, TT_REXMT) ||
th->th_ack != tp->snd_una)
/*
* Following 2 kinds of acks should not affect
* dupack counting:
* 1) Old acks
* 2) Acks with SACK but without any new SACK
* information in them. These could result from
* any anomaly in the network like a switch
* duplicating packets or a possible DoS attack.
*/
if (th->th_ack != tp->snd_una ||
((tp->t_flags & TF_SACK_PERMIT) &&
!sack_changed))
break;
else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
else if (++tp->t_dupacks > tcprexmtthresh ||
IN_FASTRECOVERY(tp->t_flags)) {
@ -2608,9 +2623,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->snd_cwnd = oldcwnd;
goto drop;
}
} else
tp->t_dupacks = 0;
}
break;
} else {
/*
* This ack is advancing the left edge, reset the
* counter.
*/
tp->t_dupacks = 0;
/*
* If this ack also has new SACK info, increment the
* counter as per rfc6675.
*/
if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
tp->t_dupacks++;
}
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
@ -2629,7 +2655,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else
cc_post_recovery(tp, th);
}
tp->t_dupacks = 0;
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.

View File

@ -345,17 +345,22 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
* Returns 1 if incoming ACK has previously unknown SACK information,
* 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
* to that (i.e. left edge moving) would also be considered a change in SACK
* information which is slightly different than rfc6675.
*/
void
int
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
int i, j, num_sack_blks;
int i, j, num_sack_blks, sack_changed;
INP_WLOCK_ASSERT(tp->t_inpcb);
num_sack_blks = 0;
sack_changed = 0;
/*
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
@ -392,7 +397,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
* received.
*/
if (num_sack_blks == 0)
return;
return (sack_changed);
/*
* Sort the SACK blocks so we can update the scoreboard with just one
@ -443,6 +448,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
sack_changed = 1;
} else {
/*
* We failed to add a new hole based on the current
@ -459,9 +465,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
SEQ_LT(tp->snd_fack, sblkp->end))
tp->snd_fack = sblkp->end;
}
} else if (SEQ_LT(tp->snd_fack, sblkp->end))
} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
/* fack is advanced. */
tp->snd_fack = sblkp->end;
sack_changed = 1;
}
/* We must have at least one SACK hole in scoreboard. */
KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
("SACK scoreboard must not be empty"));
@ -490,6 +498,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
sack_changed = 1;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@ -545,6 +554,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
else
sblkp--;
}
return (sack_changed);
}
/*

View File

@ -741,7 +741,7 @@ void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
extern struct pr_usrreqs tcp_usrreqs;
tcp_seq tcp_new_isn(struct tcpcb *);
void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);