tcp: TCP output method can request tcp_drop

The advanced TCP stacks (bbr, rack) may decide to drop a TCP connection
when they do output on it.  The default stack never does this, thus
existing framework expects tcp_output() always to return locked and
valid tcpcb.

Provide KPI extension to satisfy demands of advanced stacks.  If the
output method returns negative error code, it means that caller must
call tcp_drop().

In tcp_var() provide three inline methods to call tcp_output():
- tcp_output() is a drop-in replacement for the default stack, so that
  default stack can continue using it internally without modifications.
  For advanced stacks it would perform tcp_drop() and unlock and report
  that with negative error code.
- tcp_output_unlock() handles the negative code and always converts
  it to positive and always unlocks.
- tcp_output_nodrop() just calls the method and leaves the responsibility
  to drop on the caller.

Sweep over the advanced stacks and use new KPI instead of using HPTS
delayed drop queue for that.

Reviewed by:		rrs, tuexen
Differential revision:	https://reviews.freebsd.org/D33370
This commit is contained in:
Gleb Smirnoff 2021-12-26 08:48:19 -08:00
parent dbbcc777de
commit f64dc2ab5b
9 changed files with 210 additions and 88 deletions

View File

@ -2300,6 +2300,12 @@ t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job)
if (moretocome)
tp->t_flags |= TF_MORETOCOME;
error = tcp_output(tp);
if (error < 0) {
INP_UNLOCK_ASSERT(inp);
SOCK_IO_SEND_UNLOCK(so);
error = -error;
goto out;
}
if (moretocome)
tp->t_flags &= ~TF_MORETOCOME;
}

View File

@ -1550,6 +1550,8 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
}
inp->inp_hpts_calls = 1;
error = tcp_output(tp);
if (error < 0)
goto skip_pacing;
inp->inp_hpts_calls = 0;
if (ninp && ninp->inp_ppcb) {
/*

View File

@ -4580,8 +4580,7 @@ bbr_timeout_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
}
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
return (1);
return (-ETIMEDOUT); /* tcp_drop() */
}
/* Did we somehow get into persists? */
if (bbr->rc_in_persist) {
@ -4773,8 +4772,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
*/
if (ctf_progress_timeout_check(tp, true)) {
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
return (-ETIMEDOUT); /* tcp_drop() */
}
/*
* Hack: if the peer is dead/unreachable, we do not time out if the
@ -4787,8 +4785,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
return (-ETIMEDOUT); /* tcp_drop() */
}
if ((sbavail(&bbr->rc_inp->inp_socket->so_snd) == 0) &&
tp->snd_una == tp->snd_max) {
@ -4804,8 +4801,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
return (-ETIMEDOUT); /* tcp_drop() */
}
t_template = tcpip_maketemplate(bbr->rc_inp);
if (t_template) {
@ -4877,8 +4873,7 @@ bbr_timeout_keepalive(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
dropit:
KMOD_TCPSTAT_INC(tcps_keepdrops);
tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
return (1);
return (-ETIMEDOUT); /* tcp_drop() */
}
/*
@ -4998,10 +4993,8 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
* and retransmit one segment.
*/
if (ctf_progress_timeout_check(tp, true)) {
retval = 1;
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
return (-ETIMEDOUT); /* tcp_drop() */
}
bbr_remxt_tmr(tp);
if ((bbr->r_ctl.rc_resend == NULL) ||
@ -5017,11 +5010,11 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
if (tp->t_rxtshift > TCP_MAXRXTSHIFT) {
tp->t_rxtshift = TCP_MAXRXTSHIFT;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
retval = 1;
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
tcp_set_inp_to_drop(bbr->rc_inp,
(tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT));
goto out;
/* XXXGL: previously t_softerror was casted to uint16_t */
MPASS(tp->t_softerror >= 0);
retval = tp->t_softerror ? -tp->t_softerror : -ETIMEDOUT;
return (retval); /* tcp_drop() */
}
if (tp->t_state == TCPS_SYN_SENT) {
/*
@ -5194,7 +5187,7 @@ bbr_timeout_rxt(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts)
tp->snd_recover = tp->snd_max;
tp->t_flags |= TF_ACKNOW;
tp->t_rtttime = 0;
out:
return (retval);
}
@ -11637,7 +11630,8 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (bbr->r_wanted_output != 0) {
bbr->rc_output_starts_timer = 0;
did_out = 1;
(void)tcp_output(tp);
if (tcp_output(tp) < 0)
return (1);
} else
bbr_start_hpts_timer(bbr, tp, cts, 6, 0, 0);
}
@ -11676,7 +11670,8 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
/* We are late */
bbr->r_ctl.rc_last_delay_val = 0;
BBR_STAT_INC(bbr_force_output);
(void)tcp_output(tp);
if (tcp_output(tp) < 0)
return (1);
}
}
}
@ -12163,9 +12158,16 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
hpts_calling = inp->inp_hpts_calls;
inp->inp_hpts_calls = 0;
if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
if (bbr_process_timers(tp, bbr, cts, hpts_calling)) {
int retval;
retval = bbr_process_timers(tp, bbr, cts, hpts_calling);
if (retval != 0) {
counter_u64_add(bbr_out_size[TCP_MSS_ACCT_ATIMER], 1);
return (0);
/*
* If timers want tcp_drop(), then pass error out,
* otherwise suppress it.
*/
return (retval < 0 ? retval : 0);
}
}
bbr->rc_inp->inp_flags2 &= ~INP_MBUF_QUEUE_READY;
@ -13234,10 +13236,9 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
* is the only thing to do.
*/
BBR_STAT_INC(bbr_offset_drop);
tcp_set_inp_to_drop(inp, EFAULT);
SOCKBUF_UNLOCK(sb);
(void)m_free(m);
return (0);
return (-EFAULT); /* tcp_drop() */
}
len = rsm->r_end - rsm->r_start;
}
@ -13891,7 +13892,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
bbr->oerror_cnt++;
if (bbr_max_net_error_cnt && (bbr->oerror_cnt >= bbr_max_net_error_cnt)) {
/* drop the session */
tcp_set_inp_to_drop(inp, ENETDOWN);
return (-ENETDOWN);
}
switch (error) {
case ENOBUFS:
@ -14238,6 +14239,7 @@ struct tcp_function_block __tcp_bbr = {
.tfb_tcp_handoff_ok = bbr_handoff_ok,
.tfb_tcp_mtu_chg = bbr_mtu_chg,
.tfb_pru_options = bbr_pru_options,
.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
};
/*

View File

@ -6319,8 +6319,7 @@ rack_timeout_tlp(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8_t
}
if (ctf_progress_timeout_check(tp, true)) {
rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT);
return (1);
return (-ETIMEDOUT); /* tcp_drop() */
}
/*
* A TLP timer has expired. We have been idle for 2 rtts. So we now
@ -6538,9 +6537,8 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
if (ctf_progress_timeout_check(tp, false)) {
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(inp, ETIMEDOUT);
counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends);
return (1);
return (-ETIMEDOUT); /* tcp_drop() */
}
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
/*
@ -6558,10 +6556,9 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
retval = 1;
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT);
counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends);
retval = -ETIMEDOUT; /* tcp_drop() */
goto out;
}
if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) &&
@ -6574,11 +6571,10 @@ rack_timeout_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
*/
if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
retval = 1;
KMOD_TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT);
counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends);
retval = -ETIMEDOUT; /* tcp_drop() */
goto out;
}
t_template = tcpip_maketemplate(rack->rc_inp);
@ -6669,8 +6665,7 @@ rack_timeout_keepalive(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
dropit:
KMOD_TCPSTAT_INC(tcps_keepdrops);
tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX);
tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT);
return (1);
return (-ETIMEDOUT); /* tcp_drop() */
}
/*
@ -6874,8 +6869,7 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
if (ctf_progress_timeout_check(tp, false)) {
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(inp, ETIMEDOUT);
return (1);
return (-ETIMEDOUT); /* tcp_drop() */
}
rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RXT;
rack->r_ctl.retran_during_recovery = 0;
@ -6944,10 +6938,10 @@ rack_timeout_rxt(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
drop_it:
tp->t_rxtshift = TCP_MAXRXTSHIFT;
KMOD_TCPSTAT_INC(tcps_timeoutdrop);
retval = 1;
tcp_set_inp_to_drop(rack->rc_inp,
(tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT));
goto out;
/* XXXGL: previously t_softerror was casted to uint16_t */
MPASS(tp->t_softerror >= 0);
retval = tp->t_softerror ? -tp->t_softerror : -ETIMEDOUT;
goto out; /* tcp_drop() */
}
if (tp->t_state == TCPS_SYN_SENT) {
/*
@ -14164,7 +14158,12 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
ctf_calc_rwin(so, tp);
if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) {
send_out_a_rst:
(void)tcp_output(tp);
if (tcp_output(tp) < 0) {
#ifdef TCP_ACCOUNTING
sched_unpin();
#endif
return (1);
}
did_out = 1;
}
rack_free_trim(rack);
@ -14649,8 +14648,9 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so,
if (nxt_pkt == 0) {
if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) {
do_output_now:
if (tcp_output(tp) < 0)
return (1);
did_out = 1;
(void)tcp_output(tp);
}
rack_start_hpts_timer(rack, tp, cts, 0, 0, 0);
rack_free_trim(rack);
@ -16877,12 +16877,20 @@ rack_output(struct tcpcb *tp)
}
/* Do the timers, which may override the pacer */
if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
if (rack_process_timers(tp, rack, cts, hpts_calling, &doing_tlp)) {
int retval;
retval = rack_process_timers(tp, rack, cts, hpts_calling,
&doing_tlp);
if (retval != 0) {
counter_u64_add(rack_out_size[TCP_MSS_ACCT_ATIMER], 1);
#ifdef TCP_ACCOUNTING
sched_unpin();
#endif
return (0);
/*
* If timers want tcp_drop(), then pass error out,
* otherwise suppress it.
*/
return (retval < 0 ? retval : 0);
}
}
if (rack->rc_in_persist) {
@ -20393,6 +20401,7 @@ static struct tcp_function_block __tcp_rack = {
.tfb_tcp_mtu_chg = rack_mtu_change,
.tfb_pru_options = rack_pru_options,
.tfb_hwtls_change = rack_hw_tls_change,
.tfb_flags = TCP_FUNC_OUTPUT_CANDROP,
};
/*

View File

@ -375,7 +375,7 @@ static void tcp_default_fb_fini(struct tcpcb *tp, int tcb_is_purged);
static int tcp_default_handoff_ok(struct tcpcb *tp);
static struct inpcb *tcp_notify(struct inpcb *, int);
static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
static void tcp_mtudisc(struct inpcb *, int);
static struct inpcb *tcp_mtudisc(struct inpcb *, int);
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
@ -2398,7 +2398,8 @@ tcp_drop(struct tcpcb *tp, int errno)
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tcp_state_change(tp, TCPS_CLOSED);
(void) tcp_output(tp);
/* Don't use tcp_output() here due to possible recursion. */
(void)tcp_output_nodrop(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@ -3026,7 +3027,7 @@ tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port)
inc.inc_fibnum =
inp->inp_inc.inc_fibnum;
tcp_hc_updatemtu(&inc, mtu);
tcp_mtudisc(inp, mtu);
inp = tcp_mtudisc(inp, mtu);
}
} else
inp = (*notify)(inp,
@ -3474,11 +3475,10 @@ static struct inpcb *
tcp_mtudisc_notify(struct inpcb *inp, int error)
{
tcp_mtudisc(inp, -1);
return (inp);
return (tcp_mtudisc(inp, -1));
}
static void
static struct inpcb *
tcp_mtudisc(struct inpcb *inp, int mtuoffer)
{
struct tcpcb *tp;
@ -3487,7 +3487,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
(inp->inp_flags & INP_DROPPED))
return;
return (inp);
tp = intotcpcb(inp);
KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
@ -3517,7 +3517,10 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
*/
tp->t_fb->tfb_tcp_mtu_chg(tp);
}
tcp_output(tp);
if (tcp_output(tp) < 0)
return (NULL);
else
return (inp);
}
#ifdef INET

View File

@ -292,8 +292,7 @@ tcp_timer_delack(void *xtp)
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
NET_EPOCH_ENTER(et);
(void) tcp_output(tp);
INP_WUNLOCK(inp);
(void) tcp_output_unlock(tp);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
@ -502,6 +501,7 @@ tcp_timer_persist(void *xtp)
struct tcpcb *tp = xtp;
struct inpcb *inp;
struct epoch_tracker et;
int outrv;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@ -563,8 +563,7 @@ tcp_timer_persist(void *xtp)
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
NET_EPOCH_ENTER(et);
(void) tcp_output(tp);
NET_EPOCH_EXIT(et);
outrv = tcp_output_nodrop(tp);
tp->t_flags &= ~TF_FORCEDATA;
#ifdef TCPDEBUG
@ -572,7 +571,8 @@ tcp_timer_persist(void *xtp)
tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
#endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
INP_WUNLOCK(inp);
(void) tcp_unlock_or_drop(tp, outrv);
NET_EPOCH_EXIT(et);
out:
CURVNET_RESTORE();
}
@ -582,7 +582,7 @@ tcp_timer_rexmt(void * xtp)
{
struct tcpcb *tp = xtp;
CURVNET_SET(tp->t_vnet);
int rexmt;
int rexmt, outrv;
struct inpcb *inp;
struct epoch_tracker et;
bool isipv6;
@ -843,15 +843,15 @@ tcp_timer_rexmt(void * xtp)
cc_cong_signal(tp, NULL, CC_RTO);
NET_EPOCH_ENTER(et);
(void) tcp_output(tp);
NET_EPOCH_EXIT(et);
outrv = tcp_output_nodrop(tp);
#ifdef TCPDEBUG
if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
PRU_SLOWTIMO);
#endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
INP_WUNLOCK(inp);
(void) tcp_unlock_or_drop(tp, outrv);
NET_EPOCH_EXIT(et);
out:
CURVNET_RESTORE();
}

View File

@ -594,6 +594,8 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
#endif
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
error = tcp_output(tp);
KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()",
tp->t_fb->tfb_tcp_block_name, tp));
out_in_epoch:
NET_EPOCH_EXIT(et);
out:
@ -720,6 +722,8 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
#endif
NET_EPOCH_EXIT(et);
out:
KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()",
tp->t_fb->tfb_tcp_block_name, tp));
/*
* If the implicit bind in the connect call fails, restore
* the flags we modified.
@ -896,21 +900,20 @@ tcp_usr_shutdown(struct socket *so)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
tp = intotcpcb(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNRESET;
goto out;
}
tp = intotcpcb(inp);
TCPDEBUG1();
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
error = tcp_output(tp);
error = tcp_output_nodrop(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp);
error = tcp_unlock_or_drop(tp, error);
NET_EPOCH_EXIT(et);
return (error);
@ -925,17 +928,18 @@ tcp_usr_rcvd(struct socket *so, int flags)
struct epoch_tracker et;
struct inpcb *inp;
struct tcpcb *tp = NULL;
int error = 0;
int outrv = 0, error = 0;
TCPDEBUG0;
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
INP_WLOCK(inp);
NET_EPOCH_ENTER(et);
tp = intotcpcb(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNRESET;
goto out;
}
tp = intotcpcb(inp);
TCPDEBUG1();
/*
* For passively-created TFO connections, don't attempt a window
@ -947,18 +951,17 @@ tcp_usr_rcvd(struct socket *so, int flags)
if (IS_FASTOPEN(tp->t_flags) &&
(tp->t_state == TCPS_SYN_RECEIVED))
goto out;
NET_EPOCH_ENTER(et);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
tcp_offload_rcvd(tp);
else
#endif
tcp_output(tp);
NET_EPOCH_EXIT(et);
outrv = tcp_output_nodrop(tp);
out:
TCPDEBUG2(PRU_RCVD);
TCP_PROBE2(debug__user, tp, PRU_RCVD);
INP_WUNLOCK(inp);
(void) tcp_unlock_or_drop(tp, outrv);
NET_EPOCH_EXIT(et);
return (error);
}
@ -999,6 +1002,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
tp = intotcpcb(inp);
vflagsav = inp->inp_vflag;
incflagsav = inp->inp_inc.inc_flags;
restoreflags = false;
@ -1018,7 +1022,6 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
m_freem(control); /* empty control, just free it */
control = NULL;
}
tp = intotcpcb(inp);
if ((flags & PRUS_OOB) != 0 &&
(error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
goto out;
@ -1188,7 +1191,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
!(flags & PRUS_NOTREADY)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
error = tcp_output(tp);
error = tcp_output_nodrop(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@ -1255,7 +1258,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
if ((flags & PRUS_NOTREADY) == 0) {
tp->t_flags |= TF_FORCEDATA;
error = tcp_output(tp);
error = tcp_output_nodrop(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
}
@ -1285,7 +1288,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
error = tcp_unlock_or_drop(tp, error);
NET_EPOCH_EXIT(et);
return (error);
}
@ -1310,12 +1313,13 @@ tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
SOCKBUF_LOCK(&so->so_snd);
error = sbready(&so->so_snd, m, count);
SOCKBUF_UNLOCK(&so->so_snd);
if (error == 0) {
NET_EPOCH_ENTER(et);
error = tcp_output(tp);
NET_EPOCH_EXIT(et);
if (error) {
INP_WUNLOCK(inp);
return (error);
}
INP_WUNLOCK(inp);
NET_EPOCH_ENTER(et);
error = tcp_output_unlock(tp);
NET_EPOCH_EXIT(et);
return (error);
}
@ -2238,7 +2242,7 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
struct epoch_tracker et;
NET_EPOCH_ENTER(et);
error = tcp_output(tp);
error = tcp_output_nodrop(tp);
NET_EPOCH_EXIT(et);
}
}
@ -2767,7 +2771,8 @@ tcp_disconnect(struct tcpcb *tp)
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
tcp_output(tp);
/* Ignore stack's drop request, we already at it. */
(void)tcp_output_nodrop(tp);
}
}

View File

@ -316,7 +316,8 @@ struct tcptemp {
* function below.
*/
/* Flags for tcp functions */
#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */
#define TCP_FUNC_OUTPUT_CANDROP 0x02 /* tfb_tcp_output may ask tcp_drop */
/*
* If defining the optional tcp_timers, in the
@ -386,12 +387,107 @@ struct tcp_function {
TAILQ_HEAD(tcp_funchead, tcp_function);
struct tcpcb * tcp_drop(struct tcpcb *, int);
#ifdef _NETINET_IN_PCB_H_
/*
* tcp_output()
* Handles tcp_drop request from advanced stacks and reports that inpcb is
* gone with negative return code.
* Drop in replacement for the default stack.
*/
static inline int
tcp_output(struct tcpcb *tp)
{
int rv;
return (tp->t_fb->tfb_tcp_output(tp));
INP_WLOCK_ASSERT(tp->t_inpcb);
rv = tp->t_fb->tfb_tcp_output(tp);
if (rv < 0) {
KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
("TCP stack %s requested tcp_drop(%p)",
tp->t_fb->tfb_tcp_block_name, tp));
tp = tcp_drop(tp, -rv);
if (tp)
INP_WUNLOCK(tp->t_inpcb);
}
return (rv);
}
/*
* tcp_output_unlock()
* Always returns unlocked, handles drop request from advanced stacks.
* Always returns positive error code.
*/
static inline int
tcp_output_unlock(struct tcpcb *tp)
{
int rv;
INP_WLOCK_ASSERT(tp->t_inpcb);
rv = tp->t_fb->tfb_tcp_output(tp);
if (rv < 0) {
KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
("TCP stack %s requested tcp_drop(%p)",
tp->t_fb->tfb_tcp_block_name, tp));
rv = -rv;
tp = tcp_drop(tp, rv);
if (tp)
INP_WUNLOCK(tp->t_inpcb);
} else
INP_WUNLOCK(tp->t_inpcb);
return (rv);
}
/*
* tcp_output_nodrop()
* Always returns locked. It is caller's responsibility to run tcp_drop()!
* Useful in syscall implementations, when we want to perform some logging
* and/or tracing with tcpcb before calling tcp_drop(). To be used with
* tcp_unlock_or_drop() later.
*
* XXXGL: maybe don't allow stacks to return a drop request at certain
* TCP states? Why would it do in connect(2)? In recv(2)?
*/
static inline int
tcp_output_nodrop(struct tcpcb *tp)
{
int rv;
INP_WLOCK_ASSERT(tp->t_inpcb);
rv = tp->t_fb->tfb_tcp_output(tp);
KASSERT(rv >= 0 || tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
("TCP stack %s requested tcp_drop(%p)",
tp->t_fb->tfb_tcp_block_name, tp));
return (rv);
}
/*
* tcp_unlock_or_drop()
* Handle return code from tfb_tcp_output() after we have logged/traced,
* to be used with tcp_output_nodrop().
*/
static inline int
tcp_unlock_or_drop(struct tcpcb *tp, int tcp_output_retval)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
if (tcp_output_retval < 0) {
tcp_output_retval = -tcp_output_retval;
if (tcp_drop(tp, tcp_output_retval) != NULL)
INP_WUNLOCK(tp->t_inpcb);
} else
INP_WUNLOCK(tp->t_inpcb);
return (tcp_output_retval);
}
#endif /* _NETINET_IN_PCB_H_ */
#endif /* _KERNEL */
/*
@ -979,8 +1075,6 @@ void tcp_twclose(struct tcptw *, int);
void tcp_ctlinput(int, struct sockaddr *, void *);
int tcp_ctloutput(struct socket *, struct sockopt *);
void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *);
struct tcpcb *
tcp_drop(struct tcpcb *, int);
void tcp_drain(void);
void tcp_init(void);
void tcp_fini(void *);

View File

@ -532,7 +532,8 @@ toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
KASSERT(!(tp->t_flags & TF_TOE),
("%s: tp %p still offloaded.", __func__, tp));
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
(void) tcp_output(tp);
if (tcp_output(tp) < 0)
INP_WLOCK(inp); /* re-acquire */
} else {
tp = tcp_drop(tp, err);
if (tp == NULL)