Start to address a number of races relating to use of ifnet pointers
after the corresponding interface has been destroyed: (1) Add an ifnet refcount, ifp->if_refcount. Initialize it to 1 in if_alloc(), and modify if_free_type() to decrement and check the refcount. (2) Add new if_ref() and if_rele() interfaces to allow kernel code walking global interface lists to release IFNET_[RW]LOCK() yet keep the ifnet stable. Currently, if_rele() is a no-op wrapper around if_free(), but this may change in the future. (3) Add new ifnet field, if_alloctype, which caches the type passed to if_alloc(), but unlike if_type, won't be changed by drivers. This allows asynchronous free's of the interface after the driver has released it to still use the right type. Use that instead of the type passed to if_free_type(), but assert that they are the same (might have to rethink this if that doesn't work out). (4) Add a new ifnet_byindex_ref(), which looks up an interface by index and returns a reference rather than a pointer to it. (5) Fix if_alloc() to fully initialize the if_addr_mtx before hooking up the ifnet to global lists. (6) Modify sysctls in if_mib.c to use ifnet_byindex_ref() and release the ifnet when done. When this change is MFC'd, it will need to replace if_ispare fields rather than adding new fields in order to avoid breaking the binary interface. Once this change is MFC'd, if_free_type() should be removed, as its 'type' argument is now optional. This refcount is not appropriate for counting mbuf pkthdr references, and also not for counting entry into the device driver via ifnet function pointers. An rmlock may be appropriate for the latter. Rather, this is about ensuring data structure stability when reaching an ifnet via global ifnet lists and tables followed by copy in or out of userspace. MFC after: 3 weeks Reported by: mdtancsa Reviewed by: brooks
This commit is contained in:
parent
e72a94adc3
commit
27d37320ec
69
sys/net/if.c
69
sys/net/if.c
@ -52,6 +52,7 @@
|
||||
#include <sys/protosw.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/sockio.h>
|
||||
#include <sys/syslog.h>
|
||||
@ -204,10 +205,10 @@ static struct ifnet *
|
||||
ifnet_byindex_locked(u_short idx)
|
||||
{
|
||||
INIT_VNET_NET(curvnet);
|
||||
struct ifnet *ifp;
|
||||
|
||||
ifp = V_ifindex_table[idx].ife_ifnet;
|
||||
return (ifp);
|
||||
if (idx > V_if_index)
|
||||
return (NULL);
|
||||
return (V_ifindex_table[idx].ife_ifnet);
|
||||
}
|
||||
|
||||
struct ifnet *
|
||||
@ -221,6 +222,22 @@ ifnet_byindex(u_short idx)
|
||||
return (ifp);
|
||||
}
|
||||
|
||||
struct ifnet *
|
||||
ifnet_byindex_ref(u_short idx)
|
||||
{
|
||||
struct ifnet *ifp;
|
||||
|
||||
IFNET_RLOCK();
|
||||
ifp = ifnet_byindex_locked(idx);
|
||||
if (ifp == NULL) {
|
||||
IFNET_RUNLOCK();
|
||||
return (NULL);
|
||||
}
|
||||
if_ref(ifp);
|
||||
IFNET_RUNLOCK();
|
||||
return (ifp);
|
||||
}
|
||||
|
||||
static void
|
||||
ifnet_setbyindex(u_short idx, struct ifnet *ifp)
|
||||
{
|
||||
@ -490,6 +507,7 @@ if_alloc(u_char type)
|
||||
if_grow();
|
||||
|
||||
ifp->if_type = type;
|
||||
ifp->if_alloctype = type;
|
||||
|
||||
if (if_com_alloc[type] != NULL) {
|
||||
ifp->if_l2com = if_com_alloc[type](type, ifp);
|
||||
@ -498,11 +516,12 @@ if_alloc(u_char type)
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
|
||||
IF_ADDR_LOCK_INIT(ifp);
|
||||
refcount_init(&ifp->if_refcount, 1); /* Index reference. */
|
||||
IFNET_WLOCK();
|
||||
ifnet_setbyindex(ifp->if_index, ifp);
|
||||
IFNET_WUNLOCK();
|
||||
IF_ADDR_LOCK_INIT(ifp);
|
||||
|
||||
return (ifp);
|
||||
}
|
||||
|
||||
@ -516,7 +535,7 @@ void
|
||||
if_free(struct ifnet *ifp)
|
||||
{
|
||||
|
||||
if_free_type(ifp, ifp->if_type);
|
||||
if_free_type(ifp, ifp->if_alloctype);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -529,27 +548,49 @@ if_free_type(struct ifnet *ifp, u_char type)
|
||||
{
|
||||
INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */
|
||||
|
||||
if (ifp != ifnet_byindex(ifp->if_index)) {
|
||||
if_printf(ifp, "%s: value was not if_alloced, skipping\n",
|
||||
__func__);
|
||||
/*
|
||||
* Some drivers modify if_type, so we can't rely on it being the
|
||||
* same in free as it was in alloc. Now that we have if_alloctype,
|
||||
* we should just use that, but drivers expect to pass a type.
|
||||
*/
|
||||
KASSERT(ifp->if_alloctype == type,
|
||||
("if_free_type: type (%d) != alloctype (%d)", type,
|
||||
ifp->if_alloctype));
|
||||
|
||||
if (!refcount_release(&ifp->if_refcount))
|
||||
return;
|
||||
}
|
||||
|
||||
IFNET_WLOCK();
|
||||
KASSERT(ifp == ifnet_byindex_locked(ifp->if_index),
|
||||
("%s: freeing unallocated ifnet", ifp->if_xname));
|
||||
ifnet_setbyindex(ifp->if_index, NULL);
|
||||
|
||||
/* XXX: should be locked with if_findindex() */
|
||||
while (V_if_index > 0 && ifnet_byindex_locked(V_if_index) == NULL)
|
||||
V_if_index--;
|
||||
IFNET_WUNLOCK();
|
||||
|
||||
if (if_com_free[type] != NULL)
|
||||
if_com_free[type](ifp->if_l2com, type);
|
||||
if (if_com_free[ifp->if_alloctype] != NULL)
|
||||
if_com_free[ifp->if_alloctype](ifp->if_l2com,
|
||||
ifp->if_alloctype);
|
||||
|
||||
IF_ADDR_LOCK_DESTROY(ifp);
|
||||
free(ifp, M_IFNET);
|
||||
}
|
||||
|
||||
void
|
||||
if_ref(struct ifnet *ifp)
|
||||
{
|
||||
|
||||
/* We don't assert the ifnet list lock here, but arguably should. */
|
||||
refcount_acquire(&ifp->if_refcount);
|
||||
}
|
||||
|
||||
void
|
||||
if_rele(struct ifnet *ifp)
|
||||
{
|
||||
|
||||
if_free(ifp);
|
||||
}
|
||||
|
||||
void
|
||||
ifq_attach(struct ifaltq *ifq, struct ifnet *ifp)
|
||||
{
|
||||
|
@ -88,16 +88,16 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
|
||||
|
||||
if (namelen != 2)
|
||||
return EINVAL;
|
||||
|
||||
if (name[0] <= 0 || name[0] > V_if_index ||
|
||||
ifnet_byindex(name[0]) == NULL)
|
||||
return ENOENT;
|
||||
|
||||
ifp = ifnet_byindex(name[0]);
|
||||
if (name[0] <= 0)
|
||||
return (ENOENT);
|
||||
ifp = ifnet_byindex_ref(name[0]);
|
||||
if (ifp == NULL)
|
||||
return (ENOENT);
|
||||
|
||||
switch(name[1]) {
|
||||
default:
|
||||
return ENOENT;
|
||||
error = ENOENT;
|
||||
goto out;
|
||||
|
||||
case IFDATA_GENERAL:
|
||||
bzero(&ifmd, sizeof(ifmd));
|
||||
@ -114,11 +114,11 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
|
||||
|
||||
error = SYSCTL_OUT(req, &ifmd, sizeof ifmd);
|
||||
if (error || !req->newptr)
|
||||
return error;
|
||||
goto out;
|
||||
|
||||
error = SYSCTL_IN(req, &ifmd, sizeof ifmd);
|
||||
if (error)
|
||||
return error;
|
||||
goto out;
|
||||
|
||||
#define DONTCOPY(fld) ifmd.ifmd_data.ifi_##fld = ifp->if_data.ifi_##fld
|
||||
DONTCOPY(type);
|
||||
@ -139,18 +139,20 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
|
||||
case IFDATA_LINKSPECIFIC:
|
||||
error = SYSCTL_OUT(req, ifp->if_linkmib, ifp->if_linkmiblen);
|
||||
if (error || !req->newptr)
|
||||
return error;
|
||||
goto out;
|
||||
|
||||
error = SYSCTL_IN(req, ifp->if_linkmib, ifp->if_linkmiblen);
|
||||
if (error)
|
||||
return error;
|
||||
goto out;
|
||||
break;
|
||||
|
||||
case IFDATA_DRIVERNAME:
|
||||
/* 20 is enough for 64bit ints */
|
||||
dlen = strlen(ifp->if_dname) + 20 + 1;
|
||||
if ((dbuf = malloc(dlen, M_TEMP, M_NOWAIT)) == NULL)
|
||||
return (ENOMEM);
|
||||
if ((dbuf = malloc(dlen, M_TEMP, M_NOWAIT)) == NULL) {
|
||||
error = ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (ifp->if_dunit == IF_DUNIT_NONE)
|
||||
strcpy(dbuf, ifp->if_dname);
|
||||
else
|
||||
@ -160,9 +162,11 @@ sysctl_ifdata(SYSCTL_HANDLER_ARGS) /* XXX bad syntax! */
|
||||
if (error == 0 && req->newptr != NULL)
|
||||
error = EPERM;
|
||||
free(dbuf, M_TEMP);
|
||||
return (error);
|
||||
goto out;
|
||||
}
|
||||
return 0;
|
||||
out:
|
||||
if_rele(ifp);
|
||||
return error;
|
||||
}
|
||||
|
||||
SYSCTL_NODE(_net_link_generic, IFMIB_IFDATA, ifdata, CTLFLAG_RW,
|
||||
|
@ -121,6 +121,7 @@ struct ifnet {
|
||||
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
|
||||
const char *if_dname; /* driver name */
|
||||
int if_dunit; /* unit or IF_DUNIT_NONE */
|
||||
u_int if_refcount; /* reference count */
|
||||
struct ifaddrhead if_addrhead; /* linked list of addresses per if */
|
||||
/*
|
||||
* if_addrhead is the list of all addresses associated to
|
||||
@ -190,12 +191,14 @@ struct ifnet {
|
||||
/* protected by if_addr_mtx */
|
||||
void *if_pf_kif;
|
||||
void *if_lagg; /* lagg glue */
|
||||
u_char if_alloctype; /* if_type at time of allocation */
|
||||
|
||||
/*
|
||||
* Spare fields are added so that we can modify sensitive data
|
||||
* structures without changing the kernel binary interface, and must
|
||||
* be used with care where binary compatibility is required.
|
||||
*/
|
||||
char if_cspare[3];
|
||||
void *if_pspare[8];
|
||||
int if_ispare[4];
|
||||
};
|
||||
@ -721,7 +724,13 @@ struct ifindex_entry {
|
||||
struct cdev *ife_dev;
|
||||
};
|
||||
|
||||
/*
|
||||
* Look up an ifnet given its index; the _ref variant also acquires a
|
||||
* reference that must be freed using if_rele(). It is almost always a bug
|
||||
* to call ifnet_byindex() instead if ifnet_byindex_ref().
|
||||
*/
|
||||
struct ifnet *ifnet_byindex(u_short idx);
|
||||
struct ifnet *ifnet_byindex_ref(u_short idx);
|
||||
|
||||
/*
|
||||
* Given the index, ifaddr_byindex() returns the one and only
|
||||
@ -758,6 +767,8 @@ void if_initname(struct ifnet *, const char *, int);
|
||||
void if_link_state_change(struct ifnet *, int);
|
||||
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
|
||||
void if_qflush(struct ifnet *);
|
||||
void if_ref(struct ifnet *);
|
||||
void if_rele(struct ifnet *);
|
||||
int if_setlladdr(struct ifnet *, const u_char *, int);
|
||||
void if_up(struct ifnet *);
|
||||
/*void ifinit(void);*/ /* declared in systm.h for main() */
|
||||
|
Loading…
Reference in New Issue
Block a user