Rework TCP window scaling (RFC1323) to properly scale the send window

right from the beginning and partly clean up the differences in handling
between SYN_SENT and SYN_RCVD (syncache).

Further changes to this code to come.  This is a first incremental step
to a general overhaul and streamlining of the TCP code.

PR:		kern/15095
PR:		kern/92690 (partly)
Reviewed by:	qingli (and tested with ANVL)
Sponsored by:	TCP/IP Optimization Fundraise 2005
This commit is contained in:
Andre Oppermann 2006-02-28 23:05:59 +00:00
parent 215e7c161a
commit 464fcfbc5c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=156125
4 changed files with 42 additions and 40 deletions

View File

@ -778,12 +778,6 @@ tcp_input(m, off0)
if (tp->t_state == TCPS_CLOSED)
goto drop;
/* Unscale the window into a 32-bit value. */
if ((thflags & TH_SYN) == 0)
tiwin = th->th_win << tp->snd_scale;
else
tiwin = th->th_win;
#ifdef MAC
INP_LOCK_ASSERT(inp);
if (mac_check_inpcb_deliver(inp, m))
@ -842,7 +836,7 @@ tcp_input(m, off0)
/*
* Could not complete 3-way handshake,
* connection is being closed down, and
* syncache will free mbuf.
* syncache has free'd mbuf.
*/
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
@ -863,11 +857,6 @@ tcp_input(m, off0)
tp->snd_up = tp->snd_una;
tp->snd_max = tp->snd_nxt = tp->iss + 1;
tp->last_ack_sent = tp->rcv_nxt;
/*
* RFC1323: The window in SYN & SYN/ACK
* segments is never scaled.
*/
tp->snd_wnd = tiwin; /* unscaled */
goto after_listen;
}
if (thflags & TH_RST) {
@ -988,12 +977,12 @@ tcp_input(m, off0)
}
/*
* Segment passed TAO tests.
* XXX: Can't happen at the moment.
*/
INP_UNLOCK(inp);
inp = sotoinpcb(so);
INP_LOCK(inp);
tp = intotcpcb(inp);
tp->snd_wnd = tiwin;
tp->t_starttime = ticks;
tp->t_state = TCPS_ESTABLISHED;
@ -1008,6 +997,7 @@ tcp_input(m, off0)
tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
else
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
tiwin = th->th_win << tp->snd_scale;
tcpstat.tcps_connects++;
soisconnected(so);
goto trimthenstep6;
@ -1087,6 +1077,13 @@ tcp_input(m, off0)
if (TCPS_HAVEESTABLISHED(tp->t_state))
callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
/*
* Unscale the window into a 32-bit value.
* This value is bogus for the TCPS_SYN_SENT state
* and is overwritten later.
*/
tiwin = th->th_win << tp->snd_scale;
/*
* Process options only when we get SYN/ACK back. The SYN case
* for incoming connections is handled in tcp_syncache.
@ -1094,9 +1091,12 @@ tcp_input(m, off0)
*/
tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
if (to.to_flags & TOF_SCALE) {
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
tp->requested_s_scale = to.to_requested_s_scale;
tp->snd_scale = to.to_requested_s_scale;
tp->snd_wnd = th->th_win << tp->snd_scale;
tiwin = tp->snd_wnd;
}
if (to.to_flags & TOF_TS) {
tp->t_flags |= TF_RCVD_TSTMP;
@ -1365,7 +1365,9 @@ tcp_input(m, off0)
}
if ((thflags & TH_SYN) == 0)
goto drop;
tp->snd_wnd = th->th_win; /* initial send window */
/* Initial send window, already scaled. */
tp->snd_wnd = th->th_win;
tp->irs = th->th_seq;
tcp_rcvseqinit(tp);
@ -1380,7 +1382,6 @@ tcp_input(m, off0)
/* Do window scaling on this connection? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
tp->rcv_adv += tp->rcv_wnd;
@ -1793,8 +1794,8 @@ tcp_input(m, off0)
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
tp->snd_wnd = tiwin;
}
/*
* Make transitions:
@ -2035,8 +2036,8 @@ tcp_input(m, off0)
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
/* Send window already scaled. */
}
}

View File

@ -778,12 +778,6 @@ tcp_input(m, off0)
if (tp->t_state == TCPS_CLOSED)
goto drop;
/* Unscale the window into a 32-bit value. */
if ((thflags & TH_SYN) == 0)
tiwin = th->th_win << tp->snd_scale;
else
tiwin = th->th_win;
#ifdef MAC
INP_LOCK_ASSERT(inp);
if (mac_check_inpcb_deliver(inp, m))
@ -842,7 +836,7 @@ tcp_input(m, off0)
/*
* Could not complete 3-way handshake,
* connection is being closed down, and
* syncache will free mbuf.
* syncache has free'd mbuf.
*/
INP_UNLOCK(inp);
INP_INFO_WUNLOCK(&tcbinfo);
@ -863,11 +857,6 @@ tcp_input(m, off0)
tp->snd_up = tp->snd_una;
tp->snd_max = tp->snd_nxt = tp->iss + 1;
tp->last_ack_sent = tp->rcv_nxt;
/*
* RFC1323: The window in SYN & SYN/ACK
* segments is never scaled.
*/
tp->snd_wnd = tiwin; /* unscaled */
goto after_listen;
}
if (thflags & TH_RST) {
@ -988,12 +977,12 @@ tcp_input(m, off0)
}
/*
* Segment passed TAO tests.
* XXX: Can't happen at the moment.
*/
INP_UNLOCK(inp);
inp = sotoinpcb(so);
INP_LOCK(inp);
tp = intotcpcb(inp);
tp->snd_wnd = tiwin;
tp->t_starttime = ticks;
tp->t_state = TCPS_ESTABLISHED;
@ -1008,6 +997,7 @@ tcp_input(m, off0)
tp->t_flags |= (TF_DELACK | TF_NEEDSYN);
else
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
tiwin = th->th_win << tp->snd_scale;
tcpstat.tcps_connects++;
soisconnected(so);
goto trimthenstep6;
@ -1087,6 +1077,13 @@ tcp_input(m, off0)
if (TCPS_HAVEESTABLISHED(tp->t_state))
callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
/*
* Unscale the window into a 32-bit value.
* This value is bogus for the TCPS_SYN_SENT state
* and is overwritten later.
*/
tiwin = th->th_win << tp->snd_scale;
/*
* Process options only when we get SYN/ACK back. The SYN case
* for incoming connections is handled in tcp_syncache.
@ -1094,9 +1091,12 @@ tcp_input(m, off0)
*/
tcp_dooptions(&to, optp, optlen, thflags & TH_SYN);
if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
if (to.to_flags & TOF_SCALE) {
if ((to.to_flags & TOF_SCALE) &&
(tp->t_flags & TF_REQ_SCALE)) {
tp->t_flags |= TF_RCVD_SCALE;
tp->requested_s_scale = to.to_requested_s_scale;
tp->snd_scale = to.to_requested_s_scale;
tp->snd_wnd = th->th_win << tp->snd_scale;
tiwin = tp->snd_wnd;
}
if (to.to_flags & TOF_TS) {
tp->t_flags |= TF_RCVD_TSTMP;
@ -1365,7 +1365,9 @@ tcp_input(m, off0)
}
if ((thflags & TH_SYN) == 0)
goto drop;
tp->snd_wnd = th->th_win; /* initial send window */
/* Initial send window, already scaled. */
tp->snd_wnd = th->th_win;
tp->irs = th->th_seq;
tcp_rcvseqinit(tp);
@ -1380,7 +1382,6 @@ tcp_input(m, off0)
/* Do window scaling on this connection? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
}
tp->rcv_adv += tp->rcv_wnd;
@ -1793,8 +1794,8 @@ tcp_input(m, off0)
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
tp->snd_wnd = tiwin;
}
/*
* Make transitions:
@ -2035,8 +2036,8 @@ tcp_input(m, off0)
/* Do window scaling? */
if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
(TF_RCVD_SCALE|TF_REQ_SCALE)) {
tp->snd_scale = tp->requested_s_scale;
tp->rcv_scale = tp->request_r_scale;
/* Send window already scaled. */
}
}

View File

@ -682,7 +682,7 @@ syncache_socket(sc, lso, m)
tp->t_flags |= TF_NOOPT;
if (sc->sc_flags & SCF_WINSCALE) {
tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
tp->requested_s_scale = sc->sc_requested_s_scale;
tp->snd_scale = sc->sc_requested_s_scale;
tp->request_r_scale = sc->sc_request_r_scale;
}
if (sc->sc_flags & SCF_TIMESTAMP) {

View File

@ -173,7 +173,7 @@ struct tcpcb {
u_char snd_scale; /* window scaling for send window */
u_char rcv_scale; /* window scaling for recv window */
u_char request_r_scale; /* pending window scaling */
u_char requested_s_scale;
u_char requested_s_scale; /* unused, to be reused later */
u_long ts_recent; /* timestamp echo data */
u_long ts_recent_age; /* when last updated */