This patch improves the DSACK handling to conform with RFC 2883.

The lowest SACK block is used when multiple Blocks would be elegible as
DSACK blocks ACK blocks get reordered - while maintaining the ordering of
SACK blocks not relevant in the DSACK context is maintained.

Reviewed by:		rrs@, tuexen@
Obtained from:		Richard Scheffenegger
MFC after:		1 week
Differential Revision:	https://reviews.freebsd.org/D21038
This commit is contained in:
Michael Tuexen 2019-09-02 19:04:02 +00:00
parent 385a0efed3
commit fe5dee73f7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=351725
5 changed files with 154 additions and 15 deletions

View File

@ -1486,7 +1486,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->rfbuf_cnt += tlen; /* add up */
}
return (newsize);
}

View File

@ -149,6 +149,108 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_sack_globalholes), 0,
"Global number of TCP SACK holes currently allocated");
/*
* This function will find overlaps with the currently stored sackblocks
* and add any overlap as a dsack block upfront
*/
void
tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
{
struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS];
int i, j, n, identical;
tcp_seq start, end;
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n",
rcv_start, rcv_end, tp->rcv_nxt);
}
if (SEQ_LT(rcv_end, tp->rcv_nxt) ||
((rcv_end == tp->rcv_nxt) &&
(tp->rcv_numsacks > 0 ) &&
(tp->sackblks[0].end == tp->rcv_nxt))) {
saved_blks[0].start = rcv_start;
saved_blks[0].end = rcv_end;
} else {
saved_blks[0].start = saved_blks[0].end = 0;
}
head_blk.start = head_blk.end = 0;
mid_blk.start = rcv_start;
mid_blk.end = rcv_end;
identical = 0;
for (i = 0; i < tp->rcv_numsacks; i++) {
start = tp->sackblks[i].start;
end = tp->sackblks[i].end;
if (SEQ_LT(rcv_end, start)) {
/* pkt left to sack blk */
continue;
}
if (SEQ_GT(rcv_start, end)) {
/* pkt right to sack blk */
continue;
}
if (SEQ_GT(tp->rcv_nxt, end)) {
if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, end)) &&
(SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) ||
(head_blk.start == head_blk.end))) {
head_blk.start = SEQ_MAX(rcv_start, start);
head_blk.end = SEQ_MIN(rcv_end, end);
}
continue;
}
if (((head_blk.start == head_blk.end) ||
SEQ_LT(start, head_blk.start)) &&
(SEQ_GT(end, rcv_start) &&
SEQ_LEQ(start, rcv_end))) {
head_blk.start = start;
head_blk.end = end;
}
mid_blk.start = SEQ_MIN(mid_blk.start, start);
mid_blk.end = SEQ_MAX(mid_blk.end, end);
if ((mid_blk.start == start) &&
(mid_blk.end == end))
identical = 1;
}
if (SEQ_LT(head_blk.start, head_blk.end)) {
/* store overlapping range */
saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start);
saved_blks[0].end = SEQ_MIN(rcv_end, head_blk.end);
}
n = 1;
/*
* Second, if not ACKed, store the SACK block that
* overlaps with the DSACK block unless it is identical
*/
if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) &&
!((mid_blk.start == saved_blks[0].start) &&
(mid_blk.end == saved_blks[0].end))) ||
identical == 1) {
saved_blks[n].start = mid_blk.start;
saved_blks[n++].end = mid_blk.end;
}
for (j = 0; (j < tp->rcv_numsacks) && (j < MAX_SACK_BLKS-1); j++) {
if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) ||
SEQ_GT(tp->sackblks[j].start, mid_blk.end)) &&
(SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt))))
saved_blks[n++] = tp->sackblks[j];
}
j = 0;
for (i = 0; i < n; i++) {
/* we can end up with a stale inital entry */
if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) {
tp->sackblks[j++] = saved_blks[i];
}
}
tp->rcv_numsacks = j;
}
/*
* This function is called upon receipt of new valid data (while not in
* header prediction mode), and it updates the ordered list of sacks.
@ -170,9 +272,16 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
/* Check arguments. */
KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
/* SACK block for the received segment. */
head_blk.start = rcv_start;
head_blk.end = rcv_end;
if ((rcv_start == rcv_end) &&
(tp->rcv_numsacks >= 1) &&
(rcv_end == tp->sackblks[0].end)) {
/* retaining DSACK block below rcv_nxt (todrop) */
head_blk = tp->sackblks[0];
} else {
/* SACK block for the received segment. */
head_blk.start = rcv_start;
head_blk.end = rcv_end;
}
/*
* Merge updated SACK blocks into head_blk, and save unchanged SACK
@ -267,6 +376,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
if (num_saved >= MAX_SACK_BLKS)
num_saved--;
}
if ((rcv_start == rcv_end) &&
(rcv_start == tp->sackblks[0].end)) {
num_head = 1;
}
if (num_saved > 0) {
/*
* Copy the saved SACK blocks back.

View File

@ -1783,6 +1783,14 @@ rack_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tc
TCPSTAT_INC(tcps_rcvpartduppack);
TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
}
if (tp->t_flags & TF_SACK_PERMIT) {
/*
* record the left, to-be-dropped edge of data
* here, for use as dsack block further down
*/
tcp_update_sack_list(tp, th->th_seq,
th->th_seq + todrop);
}
*drop_hdrlen += todrop; /* drop from the top afterwards */
th->th_seq += todrop;
tlen -= todrop;
@ -4900,7 +4908,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn) {
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
} else {
rack->r_wanted_output++;
@ -4934,18 +4943,29 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* DSACK actually handled in the fastpath
* above.
*/
tcp_update_sack_list(tp, save_start, save_start + save_tlen);
tcp_update_sack_list(tp, save_start,
save_start + save_tlen);
} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
/*
* Cleaning sackblks by using zero length
* update.
*/
tcp_update_sack_list(tp, save_start, save_start);
if ((tp->rcv_numsacks >= 1) &&
(tp->sackblks[0].end == save_start)) {
/* partial overlap, recorded at todrop above */
tcp_update_sack_list(tp, tp->sackblks[0].start,
tp->sackblks[0].end);
} else {
tcp_update_dsack_list(tp, save_start,
save_start + save_tlen);
}
} else if ((tlen > 0) && (tlen >= save_tlen)) {
/* Update of sackblks. */
tcp_update_sack_list(tp, save_start, save_start + save_tlen);
tcp_update_dsack_list(tp, save_start,
save_start + save_tlen);
} else if (tlen > 0) {
tcp_update_sack_list(tp, save_start, save_start+tlen);
tcp_update_dsack_list(tp, save_start,
save_start + tlen);
}
} else {
m_freem(m);
@ -4967,7 +4987,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* now.
*/
if (tp->t_flags & TF_NEEDSYN) {
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
@ -4984,7 +5005,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
tp->t_starttime = ticks;
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
tcp_state_change(tp, TCPS_CLOSE_WAIT);
break;
@ -4993,7 +5015,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* acked so enter the CLOSING state.
*/
case TCPS_FIN_WAIT_1:
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
tcp_state_change(tp, TCPS_CLOSING);
break;
@ -5003,7 +5026,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
* other standard timers.
*/
case TCPS_FIN_WAIT_2:
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
return (1);
@ -5012,7 +5036,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
/*
* Return any desired output.
*/
if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
if ((tp->t_flags & TF_ACKNOW) ||
(sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
rack->r_wanted_output++;
}
INP_WLOCK_ASSERT(tp->t_inpcb);

View File

@ -495,7 +495,8 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, struct tcphdr *th, struct tcp
* DSACK - add SACK block for dropped range
*/
if (tp->t_flags & TF_SACK_PERMIT) {
tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
tcp_update_sack_list(tp, th->th_seq,
th->th_seq + todrop);
/*
* ACK now, as the next in-sequence segment
* will clear the DSACK block again

View File

@ -939,6 +939,7 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
tcp_seq tcp_new_isn(struct in_conninfo *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_dsack_blocks(struct tcpcb *tp);
void tcp_clean_sackreport(struct tcpcb *tp);