Rework IPX/SPX socket and pcb reference model:

- Introduce invariant that all IPX/SPX sockets will have valid so_pcb
  pointers to ipxpcb structures, and that for SPX, the control block
  pointer will always be valid.  Don't attempt to free the socket or
  pcb at various odd points, such as disconnect.

- Add a new ipxpcb flag, IPXP_DROPPED, which will be set in place of
  freeing PCB's so that this invariant can be maintained.  This flag
  is now checked instead of a NULL check in various socket protocol
  calls.

- Introduce many assertions that this invariant holds.

- Various pieces of code, such as the SPX timer code, no longer needs
  to jump through hoops in case it frees a PCB while running.

- Break out ipx_pcbfree() from ipx_pcbdetach().  Likewise
  spx_pcbdetach().

- Comment on some SMP-related limitations to the SPX code.

- Update copyrights.

MFC after:	1 month
This commit is contained in:
rwatson 2006-03-25 17:28:42 +00:00
parent f594b5f37b
commit 1cfd88a697
4 changed files with 146 additions and 92 deletions

View File

@ -274,8 +274,6 @@ ipx_pcbdisconnect(ipxp)
IPX_LOCK_ASSERT(ipxp);
ipxp->ipxp_faddr = zeroipx_addr;
if (ipxp->ipxp_socket->so_state & SS_NOFDREF)
ipx_pcbdetach(ipxp);
}
void
@ -287,10 +285,20 @@ ipx_pcbdetach(ipxp)
IPX_LIST_LOCK_ASSERT();
IPX_LOCK_ASSERT(ipxp);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
ipxp->ipxp_socket = NULL;
}
void
ipx_pcbfree(ipxp)
struct ipxpcb *ipxp;
{
KASSERT(ipxp->ipxp_socket == NULL,
("ipx_pcbfree: ipxp_socket != NULL"));
IPX_LIST_LOCK_ASSERT();
IPX_LOCK_ASSERT(ipxp);
if (ipxp->ipxp_route.ro_rt != NULL)
RTFREE(ipxp->ipxp_route.ro_rt);
LIST_REMOVE(ipxp, ipxp_list);

View File

@ -2,7 +2,7 @@
* Copyright (c) 1984, 1985, 1986, 1987, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (c) 1995, Mike Mitchell
* Copyright (c) 2004-2005 Robert N. M. Watson
* Copyright (c) 2004-2006 Robert N. M. Watson
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -68,13 +68,15 @@ extern struct ipxpcbhead ipxrawpcb_list;
extern struct mtx ipxpcb_list_mtx;
#endif
/* possible flags */
#define IPXP_IN_ABORT 0x1 /* calling abort through socket */
#define IPXP_RAWIN 0x2 /* show headers on input */
#define IPXP_RAWOUT 0x4 /* show header on output */
#define IPXP_ALL_PACKETS 0x8 /* Turn off higher proto processing */
#define IPXP_CHECKSUM 0x10 /* use checksum on this socket */
/*
* IPX/SPX PCB flags.
*/
#define IPXP_IN_ABORT 0x1 /* Calling abort through socket. */
#define IPXP_RAWIN 0x2 /* Show headers on input. */
#define IPXP_RAWOUT 0x4 /* Show header on output. */
#define IPXP_ALL_PACKETS 0x8 /* Turn off higher proto processing. */
#define IPXP_CHECKSUM 0x10 /* Use checksum on this socket. */
#define IPXP_DROPPED 0x20 /* Connection dropped. */
#define IPX_WILDCARD 1
@ -98,6 +100,7 @@ int ipx_pcbconnect(struct ipxpcb *ipxp, struct sockaddr *nam,
struct thread *p);
void ipx_pcbdetach(struct ipxpcb *ipxp);
void ipx_pcbdisconnect(struct ipxpcb *ipxp);
void ipx_pcbfree(struct ipxpcb *ipxp);
struct ipxpcb *
ipx_pcblookup(struct ipx_addr *faddr, int lport, int wildp);
void ipx_setpeeraddr(struct ipxpcb *ipxp, struct sockaddr **nam);

View File

@ -1,8 +1,9 @@
/*-
* Copyright (c) 2004-2005 Robert N. M. Watson
* Copyright (c) 1995, Mike Mitchell
* Copyright (c) 1984, 1985, 1986, 1987, 1993
* The Regents of the University of California. All rights reserved.
* The Regents of the University of California.
* Copyright (c) 1995, Mike Mitchell
* Copyright (c) 2004-2006 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -327,9 +328,8 @@ ipx_ctloutput(so, sopt)
struct ipx ioptval;
long seq;
KASSERT(ipxp != NULL, ("ipx_ctloutput: ipxp == NULL"));
error = 0;
if (ipxp == NULL)
return (EINVAL);
switch (sopt->sopt_dir) {
case SOPT_GET:
@ -434,9 +434,11 @@ ipx_usr_abort(so)
{
struct ipxpcb *ipxp = sotoipxpcb(so);
KASSERT(ipxp != NULL, ("ipx_usr_abort: ipxp == NULL"));
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
ipx_pcbdetach(ipxp);
ipx_pcbfree(ipxp);
IPX_LIST_UNLOCK();
soisdisconnected(so);
ACCEPT_LOCK();
@ -454,13 +456,13 @@ ipx_attach(so, proto, td)
struct ipxpcb *ipxp = sotoipxpcb(so);
int error;
if (ipxp != NULL)
return (EINVAL);
KASSERT(ipxp == NULL, ("ipx_attach: ipxp != NULL"));
error = soreserve(so, ipxsendspace, ipxrecvspace);
if (error != 0)
return (error);
IPX_LIST_LOCK();
error = ipx_pcballoc(so, &ipxpcb_list, td);
IPX_LIST_UNLOCK();
if (error == 0)
error = soreserve(so, ipxsendspace, ipxrecvspace);
return (error);
}
@ -473,6 +475,7 @@ ipx_bind(so, nam, td)
struct ipxpcb *ipxp = sotoipxpcb(so);
int error;
KASSERT(ipxp != NULL, ("ipx_bind: ipxp == NULL"));
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
error = ipx_pcbbind(ipxp, nam, td);
@ -490,6 +493,7 @@ ipx_connect(so, nam, td)
struct ipxpcb *ipxp = sotoipxpcb(so);
int error;
KASSERT(ipxp != NULL, ("ipx_connect: ipxp == NULL"));
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
if (!ipx_nullhost(ipxp->ipxp_faddr)) {
@ -511,11 +515,11 @@ ipx_detach(so)
{
struct ipxpcb *ipxp = sotoipxpcb(so);
if (ipxp == NULL)
return (ENOTCONN);
KASSERT(ipxp != NULL, ("ipx_detach: ipxp == NULL"));
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
ipx_pcbdetach(ipxp);
ipx_pcbfree(ipxp);
IPX_LIST_UNLOCK();
return (0);
}
@ -527,6 +531,7 @@ ipx_disconnect(so)
struct ipxpcb *ipxp = sotoipxpcb(so);
int error;
KASSERT(ipxp != NULL, ("ipx_disconnect: ipxp == NULL"));
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
error = 0;
@ -549,6 +554,7 @@ ipx_peeraddr(so, nam)
{
struct ipxpcb *ipxp = sotoipxpcb(so);
KASSERT(ipxp != NULL, ("ipx_peeraddr: ipxp == NULL"));
ipx_setpeeraddr(ipxp, nam);
return (0);
}
@ -566,6 +572,7 @@ ipx_send(so, flags, m, nam, control, td)
struct ipxpcb *ipxp = sotoipxpcb(so);
struct ipx_addr laddr;
KASSERT(ipxp != NULL, ("ipxp_send: ipxp == NULL"));
/*
* Attempt to only acquire the necessary locks: if the socket is
* already connected, we don't need to hold the IPX list lock to be
@ -619,6 +626,8 @@ static int
ipx_shutdown(so)
struct socket *so;
{
KASSERT(so->so_pcb != NULL, ("ipx_shutdown: so_pcb == NULL"));
socantsendmore(so);
return (0);
}
@ -630,6 +639,7 @@ ipx_sockaddr(so, nam)
{
struct ipxpcb *ipxp = sotoipxpcb(so);
KASSERT(ipxp != NULL, ("ipx_sockaddr: ipxp == NULL"));
ipx_setsockaddr(ipxp, nam);
return (0);
}
@ -643,6 +653,7 @@ ripx_attach(so, proto, td)
int error = 0;
struct ipxpcb *ipxp = sotoipxpcb(so);
KASSERT(ipxp == NULL, ("ripx_attach: ipxp != NULL"));
if (td != NULL && (error = suser(td)) != 0)
return (error);
/*

View File

@ -94,7 +94,7 @@ static int spx_output(struct spxpcb *cb, struct mbuf *m0);
static int spx_reass(struct spxpcb *cb, struct spx *si);
static void spx_setpersist(struct spxpcb *cb);
static void spx_template(struct spxpcb *cb);
static struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
static void spx_timers(struct spxpcb *cb, int timer);
static void spx_usrclosed(struct spxpcb *cb);
static int spx_usr_abort(struct socket *so);
@ -104,6 +104,7 @@ static int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
static int spx_connect(struct socket *so, struct sockaddr *nam,
struct thread *td);
static int spx_detach(struct socket *so);
static void spx_pcbdetach(struct ipxpcb *ipxp);
static int spx_usr_disconnect(struct socket *so);
static int spx_listen(struct socket *so, int backlog, struct thread *td);
static int spx_rcvd(struct socket *so, int flags);
@ -181,8 +182,10 @@ spx_input(struct mbuf *m, struct ipxpcb *ipxp)
IPX_LOCK_ASSERT(ipxp);
cb = ipxtospxpcb(ipxp);
if (cb == NULL)
goto bad;
KASSERT(cb != NULL, ("spx_input: cb == NULL"));
if (ipxp->ipxp_flags & IPXP_DROPPED)
goto drop;
if (m->m_len < sizeof(*si)) {
if ((m = m_pullup(m, sizeof(*si))) == NULL) {
@ -220,6 +223,14 @@ spx_input(struct mbuf *m, struct ipxpcb *ipxp)
* here should be discarded. We mark the socket as
* discardable until we're committed to it below in
* TCPS_LISTEN.
*
* XXXRW: In the new world order of real kernel parallelism,
* temporarily allocating the socket when we're "not sure"
* seems like a bad idea, as we might race to remove it if
* the listen socket is closed...?
*
* We drop the lock of the listen socket ipxp, and acquire
* the lock of the new socket ippx.
*/
dropsocket++;
IPX_UNLOCK(ipxp);
@ -355,6 +366,9 @@ spx_input(struct mbuf *m, struct ipxpcb *ipxp)
dropwithreset:
IPX_LOCK_ASSERT(ipxp);
if (cb == NULL && (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
traceallspxs))
spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
IPX_UNLOCK(ipxp);
if (dropsocket) {
struct socket *head;
@ -368,20 +382,14 @@ spx_input(struct mbuf *m, struct ipxpcb *ipxp)
so->so_head = NULL;
ACCEPT_UNLOCK();
soabort(so);
cb = NULL;
}
IPX_LIST_UNLOCK();
m_freem(dtom(si));
if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
traceallspxs)
spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
return;
drop:
bad:
IPX_LOCK_ASSERT(ipxp);
if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
traceallspxs)
if (cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG || traceallspxs)
spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
IPX_UNLOCK(ipxp);
IPX_LIST_UNLOCK();
@ -584,9 +592,8 @@ spx_reass(struct spxpcb *cb, struct spx *si)
* If this is a system packet, we don't need to queue it up, and
* won't update acknowledge #.
*/
if (si->si_cc & SPX_SP) {
if (si->si_cc & SPX_SP)
return (1);
}
/*
* We have already seen this packet, so drop.
@ -684,9 +691,9 @@ spx_reass(struct spxpcb *cb, struct spx *si)
sbappend_locked(&so->so_rcv, m);
} else
#endif
if (packetp) {
if (packetp)
sbappendrecord_locked(&so->so_rcv, m);
} else {
else {
cb->s_rhdr = *mtod(m, struct spxhdr *);
m->m_data += SPINC;
m->m_len -= SPINC;
@ -947,9 +954,9 @@ spx_output(struct spxpcb *cb, struct mbuf *m0)
* nonzero, transmit what we can, otherwise send a probe.
*/
if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
cb->s_timer[SPXT_PERSIST] == 0) {
cb->s_rxtshift = 0;
spx_setpersist(cb);
cb->s_timer[SPXT_PERSIST] == 0) {
cb->s_rxtshift = 0;
spx_setpersist(cb);
}
/*
@ -1150,6 +1157,9 @@ spx_ctloutput(struct socket *so, struct sockopt *sopt)
u_short usoptval;
int optval;
ipxp = sotoipxpcb(so);
KASSERT(ipxp != NULL, ("spx_ctloutput: ipxp == NULL"));
/*
* This will have to be changed when we do more general stacking of
* protocols.
@ -1157,9 +1167,11 @@ spx_ctloutput(struct socket *so, struct sockopt *sopt)
if (sopt->sopt_level != IPXPROTO_SPX)
return (ipx_ctloutput(so, sopt));
ipxp = sotoipxpcb(so);
if (ipxp == NULL)
return (EINVAL);
IPX_LOCK(ipxp);
if (ipxp->ipxp_flags & IPXP_DROPPED) {
IPX_UNLOCK(ipxp);
return (ECONNRESET);
}
IPX_LOCK(ipxp);
cb = ipxtospxpcb(ipxp);
@ -1211,6 +1223,9 @@ spx_ctloutput(struct socket *so, struct sockopt *sopt)
/*
* XXX Why are these shorts on get and ints on set? That
* doesn't make any sense...
*
* XXXRW: Note, when we re-acquire the ipxp lock, we should
* re-check that it's not dropped.
*/
IPX_UNLOCK(ipxp);
switch (sopt->sopt_name) {
@ -1304,7 +1319,13 @@ spx_usr_abort(struct socket *so)
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
spx_drop(cb, ECONNABORTED);
spx_pcbdetach(ipxp);
ipx_pcbdetach(ipxp);
ipx_pcbfree(ipxp);
IPX_LIST_UNLOCK();
ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return (0);
}
@ -1391,7 +1412,29 @@ spx_attach(struct socket *so, int proto, struct thread *td)
SPXTV_MIN, SPXTV_REXMTMAX);
ipxp->ipxp_pcb = (caddr_t)cb;
IPX_LIST_UNLOCK();
return (error);
return (0);
}
static void
spx_pcbdetach(struct ipxpcb *ipxp)
{
struct spxpcb *cb;
struct spx_q *s;
struct mbuf *m;
IPX_LOCK_ASSERT(ipxp);
cb = ipxtospxpcb(ipxp);
KASSERT(cb != NULL, ("spx_pcbdetach: cb == NULL"));
for (s = cb->s_q.si_next; s != NULL; s = cb->s_q.si_next) {
remque(s);
m = dtom(s);
m_freem(m);
}
m_free(dtom(cb->s_ipx));
FREE(cb, M_PCB);
ipxp->ipxp_pcb = NULL;
}
static int
@ -1478,6 +1521,9 @@ spx_detach(struct socket *so)
spx_disconnect(cb);
else
spx_close(cb);
spx_pcbdetach(ipxp);
ipx_pcbdetach(ipxp);
ipx_pcbfree(ipxp);
IPX_LIST_UNLOCK();
return (0);
}
@ -1501,6 +1547,7 @@ spx_usr_disconnect(struct socket *so)
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
spx_disconnect(cb);
IPX_UNLOCK(ipxp);
IPX_LIST_UNLOCK();
return (0);
}
@ -1570,16 +1617,18 @@ spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
cb = ipxtospxpcb(ipxp);
KASSERT(cb != NULL, ("spx_rcvoob: cb == NULL"));
IPX_LOCK(ipxp);
SOCKBUF_LOCK(&so->so_rcv);
if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
(so->so_rcv.sb_state & SBS_RCVATMARK)) {
SOCKBUF_UNLOCK(&so->so_rcv);
m->m_len = 1;
/* Unlocked read. */
*mtod(m, caddr_t) = cb->s_iobc;
IPX_UNLOCK(ipxp);
return (0);
}
SOCKBUF_UNLOCK(&so->so_rcv);
IPX_UNLOCK(ipxp);
return (EINVAL);
}
@ -1643,6 +1692,7 @@ spx_shutdown(struct socket *so)
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
spx_usrclosed(cb);
IPX_UNLOCK(ipxp);
IPX_LIST_UNLOCK();
return (0);
}
@ -1704,36 +1754,22 @@ spx_template(struct spxpcb *cb)
}
/*
* Close a SPIP control block:
* discard spx control block itself
* discard ipx protocol control block
* wake up any sleepers
* cb will always be invalid after this call.
* Close a SPIP control block. Wake up any sleepers. We used to free any
* queued packets and cb->s_ipx here, but now we defer that until the pcb is
* discarded.
*/
void
spx_close(struct spxpcb *cb)
{
struct spx_q *s;
struct ipxpcb *ipxp = cb->s_ipxpcb;
struct socket *so = ipxp->ipxp_socket;
struct mbuf *m;
KASSERT(ipxp != NULL, ("spx_close: ipxp == NULL"));
IPX_LIST_LOCK_ASSERT();
IPX_LOCK_ASSERT(ipxp);
s = cb->s_q.si_next;
while (s != &(cb->s_q)) {
s = s->si_next;
m = dtom(s->si_prev);
remque(s->si_prev);
m_freem(m);
}
m_free(dtom(cb->s_ipx));
FREE(cb, M_PCB);
ipxp->ipxp_pcb = NULL;
ipxp->ipxp_flags |= IPXP_DROPPED;
soisdisconnected(so);
ipx_pcbdetach(ipxp);
spxstat.spxs_closed++;
}
@ -1803,12 +1839,14 @@ spx_fasttimo(void)
IPX_LIST_LOCK();
LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
IPX_LOCK(ipxp);
if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
(cb->s_flags & SF_DELACK)) {
cb->s_flags &= ~SF_DELACK;
cb->s_flags |= SF_ACKNOW;
spxstat.spxs_delack++;
spx_output(cb, NULL);
if (!(ipxp->ipxp_flags & IPXP_DROPPED)) {
cb = ipxtospxpcb(ipxp);
if (cb->s_flags & SF_DELACK) {
cb->s_flags &= ~SF_DELACK;
cb->s_flags |= SF_ACKNOW;
spxstat.spxs_delack++;
spx_output(cb, NULL);
}
}
IPX_UNLOCK(ipxp);
}
@ -1822,40 +1860,37 @@ spx_fasttimo(void)
void
spx_slowtimo(void)
{
struct ipxpcb *ip, *ip_temp;
struct ipxpcb *ipxp;
struct spxpcb *cb;
int i;
/*
* Search through tcb's and update active timers. Note that timers
* may free the ipxpcb, so be sure to handle that case.
*
* spx_timers() may remove an ipxpcb entry, so we have to be ready to
* continue despite that. The logic here is a bit obfuscated.
* Search through tcb's and update active timers. Once, timers could
* free ipxp's, but now we do that only when detaching a socket.
*/
IPX_LIST_LOCK();
LIST_FOREACH_SAFE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
cb = ipxtospxpcb(ip);
if (cb == NULL)
LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
IPX_LOCK(ipxp);
if (ipxp->ipxp_flags & IPXP_DROPPED) {
IPX_UNLOCK(ipxp);
continue;
IPX_LOCK(cb->s_ipxpcb);
}
cb = (struct spxpcb *)ipxp->ipxp_pcb;
KASSERT(cb != NULL, ("spx_slowtimo: cb == NULL"));
for (i = 0; i < SPXT_NTIMERS; i++) {
if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
/*
* spx_timers() returns (NULL) if it free'd
* the pcb.
*/
cb = spx_timers(cb, i);
if (cb == NULL)
spx_timers(cb, i);
if (ipxp->ipxp_flags & IPXP_DROPPED)
break;
}
}
if (cb != NULL) {
if (!(ipxp->ipxp_flags & IPXP_DROPPED)) {
cb->s_idle++;
if (cb->s_rtt)
cb->s_rtt++;
IPX_UNLOCK(cb->s_ipxpcb);
}
IPX_UNLOCK(ipxp);
}
IPX_LIST_UNLOCK();
SPX_LOCK();
@ -1866,7 +1901,7 @@ spx_slowtimo(void)
/*
* SPX timer processing.
*/
static struct spxpcb *
static void
spx_timers(struct spxpcb *cb, int timer)
{
long rexmt;
@ -1896,7 +1931,6 @@ spx_timers(struct spxpcb *cb, int timer)
cb->s_rxtshift = SPX_MAXRXTSHIFT;
spxstat.spxs_timeoutdrop++;
spx_drop(cb, ETIMEDOUT);
cb = NULL;
break;
}
spxstat.spxs_rexmttimeo++;
@ -1965,11 +1999,9 @@ spx_timers(struct spxpcb *cb, int timer)
dropit:
spxstat.spxs_keepdrops++;
spx_drop(cb, ETIMEDOUT);
cb = NULL;
break;
default:
panic("spx_timers: unknown timer %d", timer);
}
return (cb);
}