Fix an old and well-documented use-after-free race condition in
TCP timers: - Add a reference from tcpcb to its inpcb - Defer tcpcb deletion until TCP timers have finished Differential Revision: https://reviews.freebsd.org/D2079 Submitted by: jch, Marc De La Gueronniere <mdelagueronniere@verisign.com> Reviewed by: imp, rrs, adrian, jhb, bz Approved by: jhb Sponsored by: Verisign, Inc.
This commit is contained in:
parent
c3358f4ed2
commit
5571f9cf81
@ -230,6 +230,7 @@ static struct inpcb *tcp_notify(struct inpcb *, int);
|
||||
static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
|
||||
static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
|
||||
void *ip4hdr, const void *ip6hdr);
|
||||
static void tcp_timer_discard(struct tcpcb *, uint32_t);
|
||||
|
||||
/*
|
||||
* Target size of TCP PCB hash tables. Must be a power of two.
|
||||
@ -801,7 +802,13 @@ tcp_newtcpcb(struct inpcb *inp)
|
||||
if (V_tcp_do_sack)
|
||||
tp->t_flags |= TF_SACK_PERMIT;
|
||||
TAILQ_INIT(&tp->snd_holes);
|
||||
tp->t_inpcb = inp; /* XXX */
|
||||
/*
|
||||
* The tcpcb will hold a reference on its inpcb until tcp_discardcb()
|
||||
* is called.
|
||||
*/
|
||||
in_pcbref(inp); /* Reference for tcpcb */
|
||||
tp->t_inpcb = inp;
|
||||
|
||||
/*
|
||||
* Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
|
||||
* rtt estimate. Set rttvar so that srtt + 4 * rttvar gives
|
||||
@ -920,6 +927,7 @@ tcp_discardcb(struct tcpcb *tp)
|
||||
#ifdef INET6
|
||||
int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
|
||||
#endif /* INET6 */
|
||||
int released;
|
||||
|
||||
INP_WLOCK_ASSERT(inp);
|
||||
|
||||
@ -927,22 +935,15 @@ tcp_discardcb(struct tcpcb *tp)
|
||||
* Make sure that all of our timers are stopped before we delete the
|
||||
* PCB.
|
||||
*
|
||||
* XXXRW: Really, we would like to use callout_drain() here in order
|
||||
* to avoid races experienced in tcp_timer.c where a timer is already
|
||||
* executing at this point. However, we can't, both because we're
|
||||
* running in a context where we can't sleep, and also because we
|
||||
* hold locks required by the timers. What we instead need to do is
|
||||
* test to see if callout_drain() is required, and if so, defer some
|
||||
* portion of the remainder of tcp_discardcb() to an asynchronous
|
||||
* context that can callout_drain() and then continue. Some care
|
||||
* will be required to ensure that no further processing takes place
|
||||
* on the tcpcb, even though it hasn't been freed (a flag?).
|
||||
* If stopping a timer fails, we schedule a discard function in same
|
||||
* callout, and the last discard function called will take care of
|
||||
* deleting the tcpcb.
|
||||
*/
|
||||
callout_stop(&tp->t_timers->tt_rexmt);
|
||||
callout_stop(&tp->t_timers->tt_persist);
|
||||
callout_stop(&tp->t_timers->tt_keep);
|
||||
callout_stop(&tp->t_timers->tt_2msl);
|
||||
callout_stop(&tp->t_timers->tt_delack);
|
||||
tcp_timer_stop(tp, TT_REXMT);
|
||||
tcp_timer_stop(tp, TT_PERSIST);
|
||||
tcp_timer_stop(tp, TT_KEEP);
|
||||
tcp_timer_stop(tp, TT_2MSL);
|
||||
tcp_timer_stop(tp, TT_DELACK);
|
||||
|
||||
/*
|
||||
* If we got enough samples through the srtt filter,
|
||||
@ -1019,8 +1020,80 @@ tcp_discardcb(struct tcpcb *tp)
|
||||
|
||||
CC_ALGO(tp) = NULL;
|
||||
inp->inp_ppcb = NULL;
|
||||
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
|
||||
/* We own the last reference on tcpcb, let's free it. */
|
||||
tp->t_inpcb = NULL;
|
||||
uma_zfree(V_tcpcb_zone, tp);
|
||||
released = in_pcbrele_wlocked(inp);
|
||||
KASSERT(!released, ("%s: inp %p should not have been released "
|
||||
"here", __func__, inp));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_2msl_discard(void *xtp)
|
||||
{
|
||||
|
||||
tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_keep_discard(void *xtp)
|
||||
{
|
||||
|
||||
tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_persist_discard(void *xtp)
|
||||
{
|
||||
|
||||
tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_rexmt_discard(void *xtp)
|
||||
{
|
||||
|
||||
tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_delack_discard(void *xtp)
|
||||
{
|
||||
|
||||
tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
|
||||
{
|
||||
struct inpcb *inp;
|
||||
|
||||
CURVNET_SET(tp->t_vnet);
|
||||
INP_INFO_WLOCK(&V_tcbinfo);
|
||||
inp = tp->t_inpcb;
|
||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
|
||||
__func__, tp));
|
||||
INP_WLOCK(inp);
|
||||
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
|
||||
("%s: tcpcb has to be stopped here", __func__));
|
||||
KASSERT((tp->t_timers->tt_flags & timer_type) != 0,
|
||||
("%s: discard callout should be running", __func__));
|
||||
tp->t_timers->tt_flags &= ~timer_type;
|
||||
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
|
||||
/* We own the last reference on this tcpcb, let's free it. */
|
||||
tp->t_inpcb = NULL;
|
||||
uma_zfree(V_tcpcb_zone, tp);
|
||||
if (in_pcbrele_wlocked(inp)) {
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
}
|
||||
INP_WUNLOCK(inp);
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -258,10 +258,6 @@ int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
|
||||
|
||||
static int tcp_totbackoff = 2559; /* sum of tcp_backoff[] */
|
||||
|
||||
static int tcp_timer_race;
|
||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
|
||||
0, "Count of t_inpcb races on tcp_discardcb");
|
||||
|
||||
/*
|
||||
* TCP timer processing.
|
||||
*/
|
||||
@ -274,18 +270,7 @@ tcp_timer_delack(void *xtp)
|
||||
CURVNET_SET(tp->t_vnet);
|
||||
|
||||
inp = tp->t_inpcb;
|
||||
/*
|
||||
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
||||
* tear-down mean we need it as a work-around for races between
|
||||
* timers and tcp_discardcb().
|
||||
*
|
||||
* KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
|
||||
*/
|
||||
if (inp == NULL) {
|
||||
tcp_timer_race++;
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
|
||||
INP_WLOCK(inp);
|
||||
if (callout_pending(&tp->t_timers->tt_delack) ||
|
||||
!callout_active(&tp->t_timers->tt_delack)) {
|
||||
@ -299,6 +284,10 @@ tcp_timer_delack(void *xtp)
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
|
||||
("%s: tp %p tcpcb can't be stopped here", __func__, tp));
|
||||
KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
|
||||
("%s: tp %p delack callout should be running", __func__, tp));
|
||||
|
||||
tp->t_flags |= TF_ACKNOW;
|
||||
TCPSTAT_INC(tcps_delack);
|
||||
@ -318,24 +307,9 @@ tcp_timer_2msl(void *xtp)
|
||||
|
||||
ostate = tp->t_state;
|
||||
#endif
|
||||
/*
|
||||
* XXXRW: Does this actually happen?
|
||||
*/
|
||||
INP_INFO_WLOCK(&V_tcbinfo);
|
||||
inp = tp->t_inpcb;
|
||||
/*
|
||||
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
||||
* tear-down mean we need it as a work-around for races between
|
||||
* timers and tcp_discardcb().
|
||||
*
|
||||
* KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
|
||||
*/
|
||||
if (inp == NULL) {
|
||||
tcp_timer_race++;
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
|
||||
INP_WLOCK(inp);
|
||||
tcp_free_sackholes(tp);
|
||||
if (callout_pending(&tp->t_timers->tt_2msl) ||
|
||||
@ -352,6 +326,10 @@ tcp_timer_2msl(void *xtp)
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
|
||||
("%s: tp %p tcpcb can't be stopped here", __func__, tp));
|
||||
KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
|
||||
("%s: tp %p 2msl callout should be running", __func__, tp));
|
||||
/*
|
||||
* 2 MSL timeout in shutdown went off. If we're closed but
|
||||
* still waiting for peer to close and connection has been idle
|
||||
@ -402,19 +380,7 @@ tcp_timer_keep(void *xtp)
|
||||
#endif
|
||||
INP_INFO_WLOCK(&V_tcbinfo);
|
||||
inp = tp->t_inpcb;
|
||||
/*
|
||||
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
||||
* tear-down mean we need it as a work-around for races between
|
||||
* timers and tcp_discardcb().
|
||||
*
|
||||
* KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
|
||||
*/
|
||||
if (inp == NULL) {
|
||||
tcp_timer_race++;
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
|
||||
INP_WLOCK(inp);
|
||||
if (callout_pending(&tp->t_timers->tt_keep) ||
|
||||
!callout_active(&tp->t_timers->tt_keep)) {
|
||||
@ -430,6 +396,10 @@ tcp_timer_keep(void *xtp)
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
|
||||
("%s: tp %p tcpcb can't be stopped here", __func__, tp));
|
||||
KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
|
||||
("%s: tp %p keep callout should be running", __func__, tp));
|
||||
/*
|
||||
* Keep-alive timer went off; send something
|
||||
* or drop connection if idle for too long.
|
||||
@ -505,19 +475,7 @@ tcp_timer_persist(void *xtp)
|
||||
#endif
|
||||
INP_INFO_WLOCK(&V_tcbinfo);
|
||||
inp = tp->t_inpcb;
|
||||
/*
|
||||
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
||||
* tear-down mean we need it as a work-around for races between
|
||||
* timers and tcp_discardcb().
|
||||
*
|
||||
* KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
|
||||
*/
|
||||
if (inp == NULL) {
|
||||
tcp_timer_race++;
|
||||
INP_INFO_WUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
|
||||
INP_WLOCK(inp);
|
||||
if (callout_pending(&tp->t_timers->tt_persist) ||
|
||||
!callout_active(&tp->t_timers->tt_persist)) {
|
||||
@ -533,6 +491,10 @@ tcp_timer_persist(void *xtp)
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
|
||||
("%s: tp %p tcpcb can't be stopped here", __func__, tp));
|
||||
KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
|
||||
("%s: tp %p persist callout should be running", __func__, tp));
|
||||
/*
|
||||
* Persistance timer into zero window.
|
||||
* Force a byte to be output, if possible.
|
||||
@ -594,19 +556,7 @@ tcp_timer_rexmt(void * xtp)
|
||||
|
||||
INP_INFO_RLOCK(&V_tcbinfo);
|
||||
inp = tp->t_inpcb;
|
||||
/*
|
||||
* XXXRW: While this assert is in fact correct, bugs in the tcpcb
|
||||
* tear-down mean we need it as a work-around for races between
|
||||
* timers and tcp_discardcb().
|
||||
*
|
||||
* KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
|
||||
*/
|
||||
if (inp == NULL) {
|
||||
tcp_timer_race++;
|
||||
INP_INFO_RUNLOCK(&V_tcbinfo);
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
|
||||
INP_WLOCK(inp);
|
||||
if (callout_pending(&tp->t_timers->tt_rexmt) ||
|
||||
!callout_active(&tp->t_timers->tt_rexmt)) {
|
||||
@ -622,6 +572,10 @@ tcp_timer_rexmt(void * xtp)
|
||||
CURVNET_RESTORE();
|
||||
return;
|
||||
}
|
||||
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
|
||||
("%s: tp %p tcpcb can't be stopped here", __func__, tp));
|
||||
KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
|
||||
("%s: tp %p rexmt callout should be running", __func__, tp));
|
||||
tcp_free_sackholes(tp);
|
||||
/*
|
||||
* Retransmission timer went off. Message has not
|
||||
@ -850,7 +804,7 @@ tcp_timer_rexmt(void * xtp)
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
|
||||
tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
|
||||
{
|
||||
struct callout *t_callout;
|
||||
timeout_t *f_callout;
|
||||
@ -862,6 +816,9 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (tp->t_timers->tt_flags & TT_STOPPED)
|
||||
return;
|
||||
|
||||
switch (timer_type) {
|
||||
case TT_DELACK:
|
||||
t_callout = &tp->t_timers->tt_delack;
|
||||
@ -887,14 +844,23 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
|
||||
panic("tp %p bad timer_type %#x", tp, timer_type);
|
||||
}
|
||||
if (delta == 0) {
|
||||
callout_stop(t_callout);
|
||||
if ((tp->t_timers->tt_flags & timer_type) &&
|
||||
callout_stop(t_callout)) {
|
||||
tp->t_timers->tt_flags &= ~timer_type;
|
||||
}
|
||||
} else {
|
||||
if ((tp->t_timers->tt_flags & timer_type) == 0) {
|
||||
tp->t_timers->tt_flags |= timer_type;
|
||||
callout_reset_on(t_callout, delta, f_callout, tp, cpu);
|
||||
} else {
|
||||
/* Reset already running callout on the same CPU. */
|
||||
callout_reset(t_callout, delta, f_callout, tp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
tcp_timer_active(struct tcpcb *tp, int timer_type)
|
||||
tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
|
||||
{
|
||||
struct callout *t_callout;
|
||||
|
||||
@ -920,6 +886,58 @@ tcp_timer_active(struct tcpcb *tp, int timer_type)
|
||||
return callout_active(t_callout);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
|
||||
{
|
||||
struct callout *t_callout;
|
||||
timeout_t *f_callout;
|
||||
|
||||
tp->t_timers->tt_flags |= TT_STOPPED;
|
||||
|
||||
switch (timer_type) {
|
||||
case TT_DELACK:
|
||||
t_callout = &tp->t_timers->tt_delack;
|
||||
f_callout = tcp_timer_delack_discard;
|
||||
break;
|
||||
case TT_REXMT:
|
||||
t_callout = &tp->t_timers->tt_rexmt;
|
||||
f_callout = tcp_timer_rexmt_discard;
|
||||
break;
|
||||
case TT_PERSIST:
|
||||
t_callout = &tp->t_timers->tt_persist;
|
||||
f_callout = tcp_timer_persist_discard;
|
||||
break;
|
||||
case TT_KEEP:
|
||||
t_callout = &tp->t_timers->tt_keep;
|
||||
f_callout = tcp_timer_keep_discard;
|
||||
break;
|
||||
case TT_2MSL:
|
||||
t_callout = &tp->t_timers->tt_2msl;
|
||||
f_callout = tcp_timer_2msl_discard;
|
||||
break;
|
||||
default:
|
||||
panic("tp %p bad timer_type %#x", tp, timer_type);
|
||||
}
|
||||
|
||||
if (tp->t_timers->tt_flags & timer_type) {
|
||||
if (callout_stop(t_callout)) {
|
||||
tp->t_timers->tt_flags &= ~timer_type;
|
||||
} else {
|
||||
/*
|
||||
* Can't stop the callout, defer tcpcb actual deletion
|
||||
* to the last tcp timer discard callout.
|
||||
* The TT_STOPPED flag will ensure that no tcp timer
|
||||
* callouts can be restarted on our behalf, and
|
||||
* past this point currently running callouts waiting
|
||||
* on inp lock will return right away after the
|
||||
* classical check for callout reset/stop events:
|
||||
* callout_pending() || !callout_active()
|
||||
*/
|
||||
callout_reset(t_callout, 1, f_callout, tp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define ticks_to_msecs(t) (1000*(t) / hz)
|
||||
|
||||
void
|
||||
|
@ -146,12 +146,21 @@ struct tcp_timer {
|
||||
struct callout tt_keep; /* keepalive */
|
||||
struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
|
||||
struct callout tt_delack; /* delayed ACK timer */
|
||||
uint32_t tt_flags; /* Timers flags */
|
||||
uint32_t tt_spare; /* TDB */
|
||||
};
|
||||
#define TT_DELACK 0x01
|
||||
#define TT_REXMT 0x02
|
||||
#define TT_PERSIST 0x04
|
||||
#define TT_KEEP 0x08
|
||||
#define TT_2MSL 0x10
|
||||
|
||||
/*
|
||||
* Flags for the tt_flags field.
|
||||
*/
|
||||
#define TT_DELACK 0x0001
|
||||
#define TT_REXMT 0x0002
|
||||
#define TT_PERSIST 0x0004
|
||||
#define TT_KEEP 0x0008
|
||||
#define TT_2MSL 0x0010
|
||||
#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
|
||||
|
||||
#define TT_STOPPED 0x00010000
|
||||
|
||||
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
|
||||
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
|
||||
@ -183,6 +192,11 @@ void tcp_timer_keep(void *xtp);
|
||||
void tcp_timer_persist(void *xtp);
|
||||
void tcp_timer_rexmt(void *xtp);
|
||||
void tcp_timer_delack(void *xtp);
|
||||
void tcp_timer_2msl_discard(void *xtp);
|
||||
void tcp_timer_keep_discard(void *xtp);
|
||||
void tcp_timer_persist_discard(void *xtp);
|
||||
void tcp_timer_rexmt_discard(void *xtp);
|
||||
void tcp_timer_delack_discard(void *xtp);
|
||||
void tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
|
||||
struct xtcp_timer *xtimer);
|
||||
|
||||
|
@ -708,8 +708,9 @@ void tcp_slowtimo(void);
|
||||
struct tcptemp *
|
||||
tcpip_maketemplate(struct inpcb *);
|
||||
void tcpip_fillheaders(struct inpcb *, void *, void *);
|
||||
void tcp_timer_activate(struct tcpcb *, int, u_int);
|
||||
int tcp_timer_active(struct tcpcb *, int);
|
||||
void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
|
||||
int tcp_timer_active(struct tcpcb *, uint32_t);
|
||||
void tcp_timer_stop(struct tcpcb *, uint32_t);
|
||||
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
|
||||
/*
|
||||
* All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
|
||||
|
Loading…
Reference in New Issue
Block a user