Fix a bug with transmitter restart after receiving a 0 window. The

receiver was not sending an immediate ack with delayed acks turned on
when the input buffer is drained, preventing the transmitter from
restarting immediately.

Propogate the TCP_NODELAY option to accept()ed sockets.  (Helps tbench and
is a good idea anyway).

Some cleanup.  Identify additonal issues in comments.

MFC after:	1 day
This commit is contained in:
Matthew Dillon 2001-12-02 08:49:29 +00:00
parent fce6fbfa4d
commit 262c1c1a4e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=87193
5 changed files with 56 additions and 19 deletions

View File

@ -152,10 +152,15 @@ do { \
#endif
/*
* Indicate whether this ack should be delayed.
* Indicate whether this ack should be delayed. We can delay the ack if
* - delayed acks are enabled and
* - there is no delayed ack timer in progress and
* - our last ack wasn't a 0-sized window. We never want to delay
* the ack that opens up a 0-sized window.
*/
#define DELAY_ACK(tp) \
(tcp_delack_enabled && !callout_pending(tp->tt_delack))
(tcp_delack_enabled && !callout_pending(tp->tt_delack) && \
(tp->t_flags & TF_RXWIN0SENT) == 0)
static int
tcp_reass(tp, th, tlenp, m)

View File

@ -118,7 +118,9 @@ tcp_output(tp)
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen;
int idle, sendalot;
#if 0
int maxburst = TCP_MAXBURST;
#endif
struct rmxp_tao *taop;
struct rmxp_tao tao_noncached;
#ifdef INET6
@ -277,28 +279,38 @@ tcp_output(tp)
win = sbspace(&so->so_rcv);
/*
* Sender silly window avoidance. If connection is idle
* and can send all data, a maximum segment,
* at least a maximum default-size segment do it,
* or are forced, do it; otherwise don't bother.
* If peer's buffer is tiny, then send
* when window is at least half open.
* If retransmitting (possibly after persist timer forced us
* to send into a small window), then must resend.
* Sender silly window avoidance. We transmit under the following
* conditions when len is non-zero:
*
* - We have a full segment
* - This is the last buffer in a write()/send() and we are
* either idle or running NODELAY
* - we've timed out (e.g. persist timer)
* - we have more then 1/2 the maximum send window's worth of
* data (receiver may be limited the window size)
* - we need to retransmit
*/
if (len) {
if (len == tp->t_maxseg)
goto send;
if (!(tp->t_flags & TF_MORETOCOME) &&
(idle || tp->t_flags & TF_NODELAY) &&
(tp->t_flags & TF_NOPUSH) == 0 &&
len + off >= so->so_snd.sb_cc)
/*
* NOTE! on localhost connections an 'ack' from the remote
* end may occur synchronously with the output and cause
* us to flush a buffer queued with moretocome. XXX
*
* note: the len + off check is almost certainly unnecessary.
*/
if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */
(idle || (tp->t_flags & TF_NODELAY)) &&
len + off >= so->so_snd.sb_cc &&
(tp->t_flags & TF_NOPUSH) == 0) {
goto send;
if (tp->t_force)
}
if (tp->t_force) /* typ. timeout case */
goto send;
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
goto send;
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
if (SEQ_LT(tp->snd_nxt, tp->snd_max)) /* retransmit case */
goto send;
}
@ -697,6 +709,20 @@ tcp_output(tp)
if (win > (long)TCP_MAXWIN << tp->rcv_scale)
win = (long)TCP_MAXWIN << tp->rcv_scale;
th->th_win = htons((u_short) (win>>tp->rcv_scale));
/*
* Adjust the RXWIN0SENT flag - indicate that we have advertised
* a 0 window. This may cause the remote transmitter to stall. This
* flag tells soreceive() to disable delayed acknowledgements when
* draining the buffer. This can occur if the receiver is attempting
* to read more data then can be buffered prior to transmitting on
* the connection.
*/
if (win == 0)
tp->t_flags |= TF_RXWIN0SENT;
else
tp->t_flags &= ~TF_RXWIN0SENT;
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
th->th_flags |= TH_URG;

View File

@ -152,10 +152,15 @@ do { \
#endif
/*
* Indicate whether this ack should be delayed.
* Indicate whether this ack should be delayed. We can delay the ack if
* - delayed acks are enabled and
* - there is no delayed ack timer in progress and
* - our last ack wasn't a 0-sized window. We never want to delay
* the ack that opens up a 0-sized window.
*/
#define DELAY_ACK(tp) \
(tcp_delack_enabled && !callout_pending(tp->tt_delack))
(tcp_delack_enabled && !callout_pending(tp->tt_delack) && \
(tp->t_flags & TF_RXWIN0SENT) == 0)
static int
tcp_reass(tp, th, tlenp, m)

View File

@ -657,7 +657,7 @@ syncache_socket(sc, lso)
tp->rcv_wnd = sc->sc_wnd;
tp->rcv_adv += tp->rcv_wnd;
tp->t_flags = sc->sc_tp->t_flags & TF_NOPUSH;
tp->t_flags = sc->sc_tp->t_flags & (TF_NOPUSH|TF_NODELAY);
if (sc->sc_flags & SCF_NOOPT)
tp->t_flags |= TF_NOOPT;
if (sc->sc_flags & SCF_WINSCALE) {

View File

@ -101,6 +101,7 @@ struct tcpcb {
#define TF_MORETOCOME 0x10000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x20000 /* listen queue overflow */
#define TF_LASTIDLE 0x40000 /* connection was previously idle */
#define TF_RXWIN0SENT 0x80000 /* sent a receiver win 0 in response */
int t_force; /* 1 if forcing out a byte */
tcp_seq snd_una; /* send unacknowledged */