- Postpone SACK option processing until after PAWS checks. SACK option

processing is now done in the ACK processing case.
- Merge tcp_sack_option() and tcp_del_sackholes() into a new function
  called tcp_sack_doack().
- Test (SEG.ACK < SND.MAX) before processing the ACK.

Submitted by:	Noritoshi Demizu
Reveiewed by:	Mohan Srinivasan, Raja Mukerji
Approved by:	re
This commit is contained in:
Paul Saab 2005-06-27 22:27:42 +00:00
parent dca9c930da
commit 5a53ca1627
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=147637
4 changed files with 71 additions and 116 deletions

View File

@ -164,8 +164,7 @@ struct inpcbhead tcb;
struct inpcbinfo tcbinfo;
struct mtx *tcbinfo_mtx;
static void tcp_dooptions(struct tcpcb *, struct tcpopt *, u_char *,
int, int, struct tcphdr *);
static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
@ -747,7 +746,7 @@ tcp_input(m, off0)
* present in a SYN segment. See tcp_timewait().
*/
if (thflags & TH_SYN)
tcp_dooptions((struct tcpcb *)NULL, &to, optp, optlen, 1, th);
tcp_dooptions(&to, optp, optlen, 1);
if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
&to, th, m, tlen))
goto findpcb;
@ -961,7 +960,7 @@ tcp_input(m, off0)
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(tp, &to, optp, optlen, 1, th);
tcp_dooptions(&to, optp, optlen, 1);
if (!syncache_add(&inc, &to, th, &so, m))
goto drop;
if (so == NULL) {
@ -1082,7 +1081,7 @@ tcp_input(m, off0)
* for incoming connections is handled in tcp_syncache.
* XXX this is traditional behavior, may need to be cleaned up.
*/
tcp_dooptions(tp, &to, optp, optlen, thflags & TH_SYN, th);
tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (thflags & TH_SYN) {
if (to.to_flags & TOF_SCALE) {
tp->t_flags |= TF_RCVD_SCALE;
@ -1104,11 +1103,6 @@ tcp_input(m, off0)
}
if (tp->sack_enable) {
/* Delete stale (cumulatively acked) SACK holes */
tcp_del_sackholes(tp, th);
}
/*
* Header prediction: check for the two common cases
* of a uni-directional data xfer. If the packet has
@ -1153,7 +1147,7 @@ tcp_input(m, off0)
((!tcp_do_newreno && !tp->sack_enable &&
tp->t_dupacks < tcprexmtthresh) ||
((tcp_do_newreno || tp->sack_enable) &&
!IN_FASTRECOVERY(tp)))) {
!IN_FASTRECOVERY(tp) && to.to_nsacks == 0))) {
KASSERT(headlocked, ("headlocked"));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
@ -1824,6 +1818,12 @@ tcp_input(m, off0)
case TCPS_LAST_ACK:
case TCPS_TIME_WAIT:
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
}
if (tp->sack_enable)
tcp_sack_doack(tp, &to, th->th_ack);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
@ -2002,10 +2002,6 @@ tcp_input(m, off0)
tp->snd_cwnd = tp->snd_ssthresh;
}
tp->t_dupacks = 0;
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
}
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
@ -2560,13 +2556,11 @@ tcp_input(m, off0)
* Parse TCP options and place in tcpopt.
*/
static void
tcp_dooptions(tp, to, cp, cnt, is_syn, th)
struct tcpcb *tp;
tcp_dooptions(to, cp, cnt, is_syn)
struct tcpopt *to;
u_char *cp;
int cnt;
int is_syn;
struct tcphdr *th;
{
int opt, optlen;
@ -2642,10 +2636,12 @@ tcp_dooptions(tp, to, cp, cnt, is_syn, th)
to->to_flags |= TOF_SACK;
}
break;
case TCPOPT_SACK:
if (!tp || tcp_sack_option(tp, th, cp, optlen))
if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
continue;
to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
to->to_sacks = cp + 2;
tcpstat.tcps_sack_rcv_blocks++;
break;
default:
continue;

View File

@ -164,8 +164,7 @@ struct inpcbhead tcb;
struct inpcbinfo tcbinfo;
struct mtx *tcbinfo_mtx;
static void tcp_dooptions(struct tcpcb *, struct tcpopt *, u_char *,
int, int, struct tcphdr *);
static void tcp_dooptions(struct tcpopt *, u_char *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
@ -747,7 +746,7 @@ tcp_input(m, off0)
* present in a SYN segment. See tcp_timewait().
*/
if (thflags & TH_SYN)
tcp_dooptions((struct tcpcb *)NULL, &to, optp, optlen, 1, th);
tcp_dooptions(&to, optp, optlen, 1);
if (tcp_timewait((struct tcptw *)inp->inp_ppcb,
&to, th, m, tlen))
goto findpcb;
@ -961,7 +960,7 @@ tcp_input(m, off0)
tcp_trace(TA_INPUT, ostate, tp,
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(tp, &to, optp, optlen, 1, th);
tcp_dooptions(&to, optp, optlen, 1);
if (!syncache_add(&inc, &to, th, &so, m))
goto drop;
if (so == NULL) {
@ -1082,7 +1081,7 @@ tcp_input(m, off0)
* for incoming connections is handled in tcp_syncache.
* XXX this is traditional behavior, may need to be cleaned up.
*/
tcp_dooptions(tp, &to, optp, optlen, thflags & TH_SYN, th);
tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (thflags & TH_SYN) {
if (to.to_flags & TOF_SCALE) {
tp->t_flags |= TF_RCVD_SCALE;
@ -1104,11 +1103,6 @@ tcp_input(m, off0)
}
if (tp->sack_enable) {
/* Delete stale (cumulatively acked) SACK holes */
tcp_del_sackholes(tp, th);
}
/*
* Header prediction: check for the two common cases
* of a uni-directional data xfer. If the packet has
@ -1153,7 +1147,7 @@ tcp_input(m, off0)
((!tcp_do_newreno && !tp->sack_enable &&
tp->t_dupacks < tcprexmtthresh) ||
((tcp_do_newreno || tp->sack_enable) &&
!IN_FASTRECOVERY(tp)))) {
!IN_FASTRECOVERY(tp) && to.to_nsacks == 0))) {
KASSERT(headlocked, ("headlocked"));
INP_INFO_WUNLOCK(&tcbinfo);
headlocked = 0;
@ -1824,6 +1818,12 @@ tcp_input(m, off0)
case TCPS_LAST_ACK:
case TCPS_TIME_WAIT:
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("timewait"));
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
}
if (tp->sack_enable)
tcp_sack_doack(tp, &to, th->th_ack);
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
if (tlen == 0 && tiwin == tp->snd_wnd) {
tcpstat.tcps_rcvdupack++;
@ -2002,10 +2002,6 @@ tcp_input(m, off0)
tp->snd_cwnd = tp->snd_ssthresh;
}
tp->t_dupacks = 0;
if (SEQ_GT(th->th_ack, tp->snd_max)) {
tcpstat.tcps_rcvacktoomuch++;
goto dropafterack;
}
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
@ -2560,13 +2556,11 @@ tcp_input(m, off0)
* Parse TCP options and place in tcpopt.
*/
static void
tcp_dooptions(tp, to, cp, cnt, is_syn, th)
struct tcpcb *tp;
tcp_dooptions(to, cp, cnt, is_syn)
struct tcpopt *to;
u_char *cp;
int cnt;
int is_syn;
struct tcphdr *th;
{
int opt, optlen;
@ -2642,10 +2636,12 @@ tcp_dooptions(tp, to, cp, cnt, is_syn, th)
to->to_flags |= TOF_SACK;
}
break;
case TCPOPT_SACK:
if (!tp || tcp_sack_option(tp, th, cp, optlen))
if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
continue;
to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
to->to_sacks = cp + 2;
tcpstat.tcps_sack_rcv_blocks++;
break;
default:
continue;

View File

@ -373,54 +373,54 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
}
/*
* Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue,
* and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list
* of holes (oldest to newest, in terms of the sequence space).
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
*/
int
tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
void
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
int tmp_olen;
u_char *tmp_cp;
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK], *sblkp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
int i, j, num_sack_blks;
INP_LOCK_ASSERT(tp->t_inpcb);
if (!tp->sack_enable)
return (1);
if ((th->th_flags & TH_ACK) == 0)
return (1);
/* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */
if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
return (1);
/* If ack is outside [snd_una, snd_max], ignore the SACK options */
if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
return (1);
tmp_cp = cp + 2;
tmp_olen = optlen - 2;
tcpstat.tcps_sack_rcv_blocks++;
/*
* Sort the SACK blocks so we can update the scoreboard
* with just one pass. The overhead of sorting upto 4 elements
* is less than making upto 4 passes over the scoreboard.
*/
num_sack_blks = 0;
while (tmp_olen > 0) {
bcopy(tmp_cp, &sack, sizeof(sack));
/*
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
*/
if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
sack_blocks[num_sack_blks].start = tp->snd_una;
sack_blocks[num_sack_blks++].end = th_ack;
}
/*
* Append received valid SACK blocks to sack_blocks[].
*/
for (i = 0; i < to->to_nsacks; i++) {
bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack));
sack.start = ntohl(sack.start);
sack.end = ntohl(sack.end);
if (SEQ_GT(sack.end, sack.start) &&
SEQ_GT(sack.start, tp->snd_una) &&
SEQ_GT(sack.start, th->th_ack) &&
SEQ_GT(sack.start, th_ack) &&
SEQ_LEQ(sack.end, tp->snd_max))
sack_blocks[num_sack_blks++] = sack;
tmp_olen -= TCPOLEN_SACK;
tmp_cp += TCPOLEN_SACK;
}
/*
* Return if SND.UNA is not advanced and no valid SACK block
* is received.
*/
if (num_sack_blks == 0)
return 0;
/* Bubble sort */
return;
/*
* Sort the SACK blocks so we can update the scoreboard
* with just one pass. The overhead of sorting upto 4+1 elements
* is less than making upto 4+1 passes over the scoreboard.
*/
for (i = 0; i < num_sack_blks; i++) {
for (j = i + 1; j < num_sack_blks; j++) {
if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
@ -437,7 +437,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
* (from the sack blocks received) are created later below (in
* the logic that adds holes to the tail of the scoreboard).
*/
tp->snd_fack = tp->snd_una;
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
/*
* In the while-loop below, incoming SACK blocks (sack_blocks[])
* and SACK holes (snd_holes) are traversed from their tails with
@ -460,7 +460,7 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
*/
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
if (temp == NULL)
return 0;
return;
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
@ -548,48 +548,11 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, u_char *cp, int optlen)
else
sblkp--;
}
return (0);
}
/*
* Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if
* it is completely acked; otherwise, tcp_sack_option(), called from
* tcp_dooptions(), will fix up the hole.
* Free all SACK holes to clear the scoreboard.
*/
void
tcp_del_sackholes(tp, th)
struct tcpcb *tp;
struct tcphdr *th;
{
INP_LOCK_ASSERT(tp->t_inpcb);
if (tp->sack_enable && tp->t_state != TCPS_LISTEN) {
/* max because this could be an older ack just arrived */
tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
th->th_ack : tp->snd_una;
struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
struct sackhole *prev;
while (cur)
if (SEQ_LEQ(cur->end, lastack)) {
prev = cur;
cur = TAILQ_NEXT(cur, scblink);
tp->sackhint.sack_bytes_rexmit -=
(prev->rxmit - prev->start);
tcp_sackhole_remove(tp, prev);
} else if (SEQ_LT(cur->start, lastack)) {
if (SEQ_LT(cur->rxmit, lastack)) {
tp->sackhint.sack_bytes_rexmit -=
(cur->rxmit - cur->start);
cur->rxmit = lastack;
} else
tp->sackhint.sack_bytes_rexmit -=
(lastack - cur->start);
cur->start = lastack;
break;
} else
break;
}
}
void
tcp_free_sackholes(struct tcpcb *tp)
{

View File

@ -238,7 +238,8 @@ struct tcpopt {
u_int32_t to_tsecr;
u_int16_t to_mss;
u_int8_t to_requested_s_scale;
u_int8_t to_pad;
u_int8_t to_nsacks; /* number of SACK blocks */
u_char *to_sacks; /* pointer to the first SACK blocks */
};
#ifdef _NETINET_IN_PCB_H_
@ -578,9 +579,8 @@ extern u_long tcp_sendspace;
extern u_long tcp_recvspace;
tcp_seq tcp_new_isn(struct tcpcb *);
int tcp_sack_option(struct tcpcb *,struct tcphdr *,u_char *,int);
void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_del_sackholes(struct tcpcb *, struct tcphdr *);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);