diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index c0a3d9600ca5..cb32e2f5d27c 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1461,6 +1461,29 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so, return (newsize); } +void +tcp_handle_wakeup(struct tcpcb *tp, struct socket *so) +{ + /* + * Since tp might be gone if the session entered + * the TIME_WAIT state before coming here, we need + * to check if the socket is still connected. + */ + if ((so->so_state & SS_ISCONNECTED) == 0) + return; + INP_LOCK_ASSERT(tp->t_inpcb); + if (tp->t_flags & TF_WAKESOR) { + tp->t_flags &= ~TF_WAKESOR; + SOCKBUF_UNLOCK_ASSERT(&so->so_rcv); + sorwakeup(so); + } + if (tp->t_flags & TF_WAKESOW) { + tp->t_flags &= ~TF_WAKESOW; + SOCKBUF_UNLOCK_ASSERT(&so->so_snd); + sowwakeup(so); + } +} + void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) @@ -1811,7 +1834,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); - sowwakeup(so); + tp->t_flags |= TF_WAKESOW; if (sbavail(&so->so_snd)) (void) tp->t_fb->tfb_tcp_output(tp); goto check_delack; @@ -1876,8 +1899,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m, 0); } - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; if (DELAY_ACK(tp, tlen)) { tp->t_flags |= TF_DELACK; } else { @@ -2811,8 +2834,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->snd_wnd = 0; ourfinisacked = 0; } - /* NB: sowwakeup_locked() does an implicit unlock. */ - sowwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_snd); + tp->t_flags |= TF_WAKESOW; m_freem(mfree); /* Detect una wraparound. */ if (!IN_RECOVERY(tp->t_flags) && @@ -3033,8 +3056,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, m_freem(m); else sbappendstream_locked(&so->so_rcv, m, 0); - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; } else { /* * XXX: Due to the header drop above "th" is @@ -3101,6 +3124,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); + /* The socket upcall is handled by socantrcvmore. */ + tp->t_flags &= ~TF_WAKESOR; /* * If connection is half-synchronized * (ie NEEDSYN flag on) then delay ACK, @@ -3164,6 +3189,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } + tcp_handle_wakeup(tp, so); INP_WUNLOCK(tp->t_inpcb); return; @@ -3197,6 +3223,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, TCP_PROBE3(debug__input, tp, th, m); tp->t_flags |= TF_ACKNOW; (void) tp->t_fb->tfb_tcp_output(tp); + tcp_handle_wakeup(tp, so); INP_WUNLOCK(tp->t_inpcb); m_freem(m); return; @@ -3204,6 +3231,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, dropwithreset: if (tp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); + tcp_handle_wakeup(tp, so); INP_WUNLOCK(tp->t_inpcb); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); @@ -3219,8 +3247,10 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, &tcp_savetcp, 0); #endif TCP_PROBE3(debug__input, tp, th, m); - if (tp != NULL) + if (tp != NULL) { + tcp_handle_wakeup(tp, so); INP_WUNLOCK(tp->t_inpcb); + } m_freem(m); } diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index d57cb4318a94..310d7d540507 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -959,7 +959,8 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start, } else { sbappendstream_locked(&so->so_rcv, m, 0); } - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; return (flags); } if (tcp_new_limits) { @@ -1107,6 +1108,7 @@ tcp_reass(struct tcpcb *tp, struct tcphdr *th, tcp_seq *seq_start, #ifdef TCP_REASS_LOGGING tcp_reass_log_dump(tp); #endif - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; return (flags); } diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 2e74ecbe8ea1..5114ef4d2ba3 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -7876,8 +7876,8 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, acked_amount = min(acked, (int)sbavail(&so->so_snd)); tp->snd_wnd -= acked_amount; mfree = sbcut_locked(&so->so_snd, acked_amount); - /* NB: sowwakeup_locked() does an implicit unlock. */ - sowwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_snd); + tp->t_flags |= TF_WAKESOW; m_freem(mfree); if (SEQ_GT(th->th_ack, tp->snd_una)) { bbr_collapse_rtt(tp, bbr, TCP_REXMTVAL(tp)); @@ -8353,8 +8353,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, appended = #endif sbappendstream_locked(&so->so_rcv, m, 0); - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; #ifdef NETFLIX_SB_LIMITS if (so->so_rcv.sb_shlim && appended != mcnt) counter_fo_release(so->so_rcv.sb_shlim, @@ -8414,6 +8414,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); + /* The socket upcall is handled by socantrcvmore. */ + tp->t_flags &= ~TF_WAKESOR; /* * If connection is half-synchronized (ie NEEDSYN * flag on) then delay ACK, so it may be piggybacked @@ -8604,8 +8606,8 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, sbappendstream_locked(&so->so_rcv, m, 0); ctf_calc_rwin(so, tp); } - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; #ifdef NETFLIX_SB_LIMITS if (so->so_rcv.sb_shlim && mcnt != appended) counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended); @@ -8796,7 +8798,7 @@ bbr_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, &tcp_savetcp, 0); #endif /* Wake up the socket if we have room to write more */ - sowwakeup(so); + tp->t_flags |= TF_WAKESOW; if (tp->snd_una == tp->snd_max) { /* Nothing left outstanding */ bbr_log_progress_event(bbr, tp, ticks, PROGRESS_CLEAR, __LINE__); @@ -11740,8 +11742,10 @@ bbr_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, } retval = bbr_do_segment_nounlock(m, th, so, tp, drop_hdrlen, tlen, iptos, 0, &tv); - if (retval == 0) + if (retval == 0) { + tcp_handle_wakeup(tp, so); INP_WUNLOCK(tp->t_inpcb); + } } /* diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 635e932bb80e..7523a98db4c5 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -8344,8 +8344,8 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so, */ ourfinisacked = 1; } - /* NB: sowwakeup_locked() does an implicit unlock. */ - sowwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_snd); + tp->t_flags |= TF_WAKESOW; m_freem(mfree); if (rack->r_ctl.rc_early_recovery == 0) { if (IN_RECOVERY(tp->t_flags)) { @@ -8665,8 +8665,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, appended = #endif sbappendstream_locked(&so->so_rcv, m, 0); - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; #ifdef NETFLIX_SB_LIMITS if (so->so_rcv.sb_shlim && appended != mcnt) counter_fo_release(so->so_rcv.sb_shlim, @@ -8731,6 +8731,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, struct socket *so, if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); + /* The socket upcall is handled by socantrcvmore. */ + tp->t_flags &= ~TF_WAKESOR; /* * If connection is half-synchronized (ie NEEDSYN * flag on) then delay ACK, so it may be piggybacked @@ -8922,8 +8924,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so, sbappendstream_locked(&so->so_rcv, m, 0); ctf_calc_rwin(so, tp); } - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(&so->so_rcv); + tp->t_flags |= TF_WAKESOR; #ifdef NETFLIX_SB_LIMITS if (so->so_rcv.sb_shlim && mcnt != appended) counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended); @@ -9140,7 +9142,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__); } /* Wake up the socket if we have room to write more */ - sowwakeup(so); + tp->t_flags |= TF_WAKESOW; if (sbavail(&so->so_snd)) { rack->r_wanted_output = 1; } @@ -11188,8 +11190,10 @@ rack_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_get_usecs(&tv); } if(rack_do_segment_nounlock(m, th, so, tp, - drop_hdrlen, tlen, iptos, 0, &tv) == 0) + drop_hdrlen, tlen, iptos, 0, &tv) == 0) { + tcp_handle_wakeup(tp, so); INP_WUNLOCK(tp->t_inpcb); + } } struct rack_sendmap * diff --git a/sys/netinet/tcp_stacks/rack_bbr_common.c b/sys/netinet/tcp_stacks/rack_bbr_common.c index b7d758f5fcd8..e73a3e60fd64 100644 --- a/sys/netinet/tcp_stacks/rack_bbr_common.c +++ b/sys/netinet/tcp_stacks/rack_bbr_common.c @@ -458,6 +458,7 @@ ctf_do_queued_segments(struct socket *so, struct tcpcb *tp, int have_pkt) /* We lost the tcpcb (maybe a RST came in)? */ return(1); } + tcp_handle_wakeup(tp, so); } return (0); } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5fe0d4a07d60..5600d52170fe 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -381,7 +381,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function); #define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */ #define TF_NOPUSH 0x00001000 /* don't push */ #define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */ -#define TF_UNUSED1 0x00004000 /* unused */ +#define TF_WAKESOR 0x00004000 /* wake up receive socket */ #define TF_GPUTINPROG 0x00008000 /* Goodput measurement in progress */ #define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */ @@ -393,9 +393,9 @@ TAILQ_HEAD(tcp_funchead, tcp_function); #define TF_FORCEDATA 0x00800000 /* force out a byte */ #define TF_TSO 0x01000000 /* TSO enabled on this connection */ #define TF_TOE 0x02000000 /* this connection is offloaded */ -#define TF_UNUSED3 0x04000000 /* unused */ -#define TF_UNUSED4 0x08000000 /* unused */ -#define TF_UNUSED5 0x10000000 /* unused */ +#define TF_WAKESOW 0x04000000 /* wake up send socket */ +#define TF_UNUSED1 0x08000000 /* unused */ +#define TF_UNUSED2 0x10000000 /* unused */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ @@ -931,7 +931,8 @@ char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *, const void *); char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *, const void *); -int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *); +int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, + struct mbuf *); void tcp_reass_global_init(void); void tcp_reass_flush(struct tcpcb *); void tcp_dooptions(struct tcpopt *, u_char *, int, int); @@ -955,6 +956,7 @@ void hhook_run_tcp_est_in(struct tcpcb *tp, int tcp_input(struct mbuf **, int *, int); int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int); +void tcp_handle_wakeup(struct tcpcb *, struct socket *); void tcp_do_segment(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t);