Prevent premature SACK block transmission during loss recovery
Under specific conditions, a window update can be sent with outdated SACK information. Some clients react to this by subsequently delaying loss recovery, making TCP perform very poorly. Reported by: chengc_netapp.com Reviewed by: rrs, jtl MFC after: 2 weeks Sponsored by: NetApp, Inc. Differential Revision: https://reviews.freebsd.org/D24237
This commit is contained in:
parent
2d39824195
commit
4d0770f172
@ -1461,6 +1461,29 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
return (newsize);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
|
||||
{
|
||||
/*
|
||||
* Since tp might be gone if the session entered
|
||||
* the TIME_WAIT state before coming here, we need
|
||||
* to check if the socket is still connected.
|
||||
*/
|
||||
if ((so->so_state & SS_ISCONNECTED) == 0)
|
||||
return;
|
||||
INP_LOCK_ASSERT(tp->t_inpcb);
|
||||
if (tp->t_flags & TF_WAKESOR) {
|
||||
tp->t_flags &= ~TF_WAKESOR;
|
||||
SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
|
||||
sorwakeup(so);
|
||||
}
|
||||
if (tp->t_flags & TF_WAKESOW) {
|
||||
tp->t_flags &= ~TF_WAKESOW;
|
||||
SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
|
||||
sowwakeup(so);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
|
||||
@ -1811,7 +1834,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
else if (!tcp_timer_active(tp, TT_PERSIST))
|
||||
tcp_timer_activate(tp, TT_REXMT,
|
||||
tp->t_rxtcur);
|
||||
sowwakeup(so);
|
||||
tp->t_flags |= TF_WAKESOW;
|
||||
if (sbavail(&so->so_snd))
|
||||
(void) tp->t_fb->tfb_tcp_output(tp);
|
||||
goto check_delack;
|
||||
@ -1876,8 +1899,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
m_adj(m, drop_hdrlen); /* delayed header drop */
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
}
|
||||
/* NB: sorwakeup_locked() does an implicit unlock. */
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
if (DELAY_ACK(tp, tlen)) {
|
||||
tp->t_flags |= TF_DELACK;
|
||||
} else {
|
||||
@ -2811,8 +2834,8 @@ process_ACK:
|
||||
tp->snd_wnd = 0;
|
||||
ourfinisacked = 0;
|
||||
}
|
||||
/* NB: sowwakeup_locked() does an implicit unlock. */
|
||||
sowwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_snd);
|
||||
tp->t_flags |= TF_WAKESOW;
|
||||
m_freem(mfree);
|
||||
/* Detect una wraparound. */
|
||||
if (!IN_RECOVERY(tp->t_flags) &&
|
||||
@ -3033,8 +3056,8 @@ dodata: /* XXX */
|
||||
m_freem(m);
|
||||
else
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
/* NB: sorwakeup_locked() does an implicit unlock. */
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
} else {
|
||||
/*
|
||||
* XXX: Due to the header drop above "th" is
|
||||
@ -3101,6 +3124,8 @@ dodata: /* XXX */
|
||||
if (thflags & TH_FIN) {
|
||||
if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
|
||||
socantrcvmore(so);
|
||||
/* The socket upcall is handled by socantrcvmore. */
|
||||
tp->t_flags &= ~TF_WAKESOR;
|
||||
/*
|
||||
* If connection is half-synchronized
|
||||
* (ie NEEDSYN flag on) then delay ACK,
|
||||
@ -3164,6 +3189,7 @@ check_delack:
|
||||
tp->t_flags &= ~TF_DELACK;
|
||||
tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
|
||||
}
|
||||
tcp_handle_wakeup(tp, so);
|
||||
INP_WUNLOCK(tp->t_inpcb);
|
||||
return;
|
||||
|
||||
@ -3197,6 +3223,7 @@ dropafterack:
|
||||
TCP_PROBE3(debug__input, tp, th, m);
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
(void) tp->t_fb->tfb_tcp_output(tp);
|
||||
tcp_handle_wakeup(tp, so);
|
||||
INP_WUNLOCK(tp->t_inpcb);
|
||||
m_freem(m);
|
||||
return;
|
||||
@ -3204,6 +3231,7 @@ dropafterack:
|
||||
dropwithreset:
|
||||
if (tp != NULL) {
|
||||
tcp_dropwithreset(m, th, tp, tlen, rstreason);
|
||||
tcp_handle_wakeup(tp, so);
|
||||
INP_WUNLOCK(tp->t_inpcb);
|
||||
} else
|
||||
tcp_dropwithreset(m, th, NULL, tlen, rstreason);
|
||||
@ -3219,8 +3247,10 @@ drop:
|
||||
&tcp_savetcp, 0);
|
||||
#endif
|
||||
TCP_PROBE3(debug__input, tp, th, m);
|
||||
if (tp != NULL)
|
||||
if (tp != NULL) {
|
||||
tcp_handle_wakeup(tp, so);
|
||||
INP_WUNLOCK(tp->t_inpcb);
|
||||
}
|
||||
m_freem(m);
|
||||
}
|
||||
|
||||
|
@ -959,7 +959,8 @@ new_entry:
|
||||
} else {
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
}
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
return (flags);
|
||||
}
|
||||
if (tcp_new_limits) {
|
||||
@ -1107,6 +1108,7 @@ present:
|
||||
#ifdef TCP_REASS_LOGGING
|
||||
tcp_reass_log_dump(tp);
|
||||
#endif
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
return (flags);
|
||||
}
|
||||
|
@ -7876,8 +7876,8 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
acked_amount = min(acked, (int)sbavail(&so->so_snd));
|
||||
tp->snd_wnd -= acked_amount;
|
||||
mfree = sbcut_locked(&so->so_snd, acked_amount);
|
||||
/* NB: sowwakeup_locked() does an implicit unlock. */
|
||||
sowwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_snd);
|
||||
tp->t_flags |= TF_WAKESOW;
|
||||
m_freem(mfree);
|
||||
if (SEQ_GT(th->th_ack, tp->snd_una)) {
|
||||
bbr_collapse_rtt(tp, bbr, TCP_REXMTVAL(tp));
|
||||
@ -8353,8 +8353,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
appended =
|
||||
#endif
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
/* NB: sorwakeup_locked() does an implicit unlock. */
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
#ifdef NETFLIX_SB_LIMITS
|
||||
if (so->so_rcv.sb_shlim && appended != mcnt)
|
||||
counter_fo_release(so->so_rcv.sb_shlim,
|
||||
@ -8414,6 +8414,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
if (thflags & TH_FIN) {
|
||||
if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
|
||||
socantrcvmore(so);
|
||||
/* The socket upcall is handled by socantrcvmore. */
|
||||
tp->t_flags &= ~TF_WAKESOR;
|
||||
/*
|
||||
* If connection is half-synchronized (ie NEEDSYN
|
||||
* flag on) then delay ACK, so it may be piggybacked
|
||||
@ -8604,8 +8606,8 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
ctf_calc_rwin(so, tp);
|
||||
}
|
||||
/* NB: sorwakeup_locked() does an implicit unlock. */
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
#ifdef NETFLIX_SB_LIMITS
|
||||
if (so->so_rcv.sb_shlim && mcnt != appended)
|
||||
counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
|
||||
@ -8796,7 +8798,7 @@ bbr_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
&tcp_savetcp, 0);
|
||||
#endif
|
||||
/* Wake up the socket if we have room to write more */
|
||||
sowwakeup(so);
|
||||
tp->t_flags |= TF_WAKESOW;
|
||||
if (tp->snd_una == tp->snd_max) {
|
||||
/* Nothing left outstanding */
|
||||
bbr_log_progress_event(bbr, tp, ticks, PROGRESS_CLEAR, __LINE__);
|
||||
@ -11740,8 +11742,10 @@ bbr_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
}
|
||||
retval = bbr_do_segment_nounlock(m, th, so, tp,
|
||||
drop_hdrlen, tlen, iptos, 0, &tv);
|
||||
if (retval == 0)
|
||||
if (retval == 0) {
|
||||
tcp_handle_wakeup(tp, so);
|
||||
INP_WUNLOCK(tp->t_inpcb);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -8344,8 +8344,8 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
*/
|
||||
ourfinisacked = 1;
|
||||
}
|
||||
/* NB: sowwakeup_locked() does an implicit unlock. */
|
||||
sowwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_snd);
|
||||
tp->t_flags |= TF_WAKESOW;
|
||||
m_freem(mfree);
|
||||
if (rack->r_ctl.rc_early_recovery == 0) {
|
||||
if (IN_RECOVERY(tp->t_flags)) {
|
||||
@ -8665,8 +8665,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
appended =
|
||||
#endif
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
/* NB: sorwakeup_locked() does an implicit unlock. */
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
#ifdef NETFLIX_SB_LIMITS
|
||||
if (so->so_rcv.sb_shlim && appended != mcnt)
|
||||
counter_fo_release(so->so_rcv.sb_shlim,
|
||||
@ -8731,6 +8731,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
if (thflags & TH_FIN) {
|
||||
if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
|
||||
socantrcvmore(so);
|
||||
/* The socket upcall is handled by socantrcvmore. */
|
||||
tp->t_flags &= ~TF_WAKESOR;
|
||||
/*
|
||||
* If connection is half-synchronized (ie NEEDSYN
|
||||
* flag on) then delay ACK, so it may be piggybacked
|
||||
@ -8922,8 +8924,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
sbappendstream_locked(&so->so_rcv, m, 0);
|
||||
ctf_calc_rwin(so, tp);
|
||||
}
|
||||
/* NB: sorwakeup_locked() does an implicit unlock. */
|
||||
sorwakeup_locked(so);
|
||||
SOCKBUF_UNLOCK(&so->so_rcv);
|
||||
tp->t_flags |= TF_WAKESOR;
|
||||
#ifdef NETFLIX_SB_LIMITS
|
||||
if (so->so_rcv.sb_shlim && mcnt != appended)
|
||||
counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
|
||||
@ -9140,7 +9142,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
|
||||
}
|
||||
/* Wake up the socket if we have room to write more */
|
||||
sowwakeup(so);
|
||||
tp->t_flags |= TF_WAKESOW;
|
||||
if (sbavail(&so->so_snd)) {
|
||||
rack->r_wanted_output = 1;
|
||||
}
|
||||
@ -11188,8 +11190,10 @@ rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
|
||||
tcp_get_usecs(&tv);
|
||||
}
|
||||
if(rack_do_segment_nounlock(m, th, so, tp,
|
||||
drop_hdrlen, tlen, iptos, 0, &tv) == 0)
|
||||
drop_hdrlen, tlen, iptos, 0, &tv) == 0) {
|
||||
tcp_handle_wakeup(tp, so);
|
||||
INP_WUNLOCK(tp->t_inpcb);
|
||||
}
|
||||
}
|
||||
|
||||
struct rack_sendmap *
|
||||
|
@ -458,6 +458,7 @@ ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, int have_pkt)
|
||||
/* We lost the tcpcb (maybe a RST came in)? */
|
||||
return(1);
|
||||
}
|
||||
tcp_handle_wakeup(tp, so);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
@ -381,7 +381,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
|
||||
#define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */
|
||||
#define TF_NOPUSH 0x00001000 /* don't push */
|
||||
#define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */
|
||||
#define TF_UNUSED1 0x00004000 /* unused */
|
||||
#define TF_WAKESOR 0x00004000 /* wake up receive socket */
|
||||
#define TF_GPUTINPROG 0x00008000 /* Goodput measurement in progress */
|
||||
#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
|
||||
#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */
|
||||
@ -393,9 +393,9 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
|
||||
#define TF_FORCEDATA 0x00800000 /* force out a byte */
|
||||
#define TF_TSO 0x01000000 /* TSO enabled on this connection */
|
||||
#define TF_TOE 0x02000000 /* this connection is offloaded */
|
||||
#define TF_UNUSED3 0x04000000 /* unused */
|
||||
#define TF_UNUSED4 0x08000000 /* unused */
|
||||
#define TF_UNUSED5 0x10000000 /* unused */
|
||||
#define TF_WAKESOW 0x04000000 /* wake up send socket */
|
||||
#define TF_UNUSED1 0x08000000 /* unused */
|
||||
#define TF_UNUSED2 0x10000000 /* unused */
|
||||
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
|
||||
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
|
||||
#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
|
||||
@ -931,7 +931,8 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
|
||||
const void *);
|
||||
char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
|
||||
const void *);
|
||||
int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *);
|
||||
int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *,
|
||||
struct mbuf *);
|
||||
void tcp_reass_global_init(void);
|
||||
void tcp_reass_flush(struct tcpcb *);
|
||||
void tcp_dooptions(struct tcpopt *, u_char *, int, int);
|
||||
@ -955,6 +956,7 @@ void hhook_run_tcp_est_in(struct tcpcb *tp,
|
||||
int tcp_input(struct mbuf **, int *, int);
|
||||
int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
|
||||
struct tcpcb *, int);
|
||||
void tcp_handle_wakeup(struct tcpcb *, struct socket *);
|
||||
void tcp_do_segment(struct mbuf *, struct tcphdr *,
|
||||
struct socket *, struct tcpcb *, int, int, uint8_t);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user