Reimplement and/or implement vnet list locking by replacing a mostly

unused custom mutex/condvar-based sleep locks with two locks: an
rwlock (for non-sleeping use) and sxlock (for sleeping use).  Either
acquired for read is sufficient to stabilize the vnet list, but both
must be acquired for write to modify the list.

Replace previous no-op read locking macros, used in various places
in the stack, with actual locking to prevent race conditions.  Callers
must declare when they may perform unbounded sleeps or not when
selecting how to lock.

Refactor vnet sysinits so that the vnet list and locks are initialized
before kernel modules are linked, as the kernel linker will use them
for modules loaded by the boot loader.

Update various consumers of these KPIs based on whether they may sleep
or not.

Reviewed by:	bz
Approved by:	re (kib)
This commit is contained in:
Robert Watson 2009-07-19 14:20:53 +00:00
parent 519f677aff
commit 5ee847d3ac
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=195760
14 changed files with 98 additions and 65 deletions

View File

@ -58,23 +58,22 @@ static void vnet_mod_complete_registration(struct vnet_modlink *);
static int vnet_mod_constructor(struct vnet_modlink *);
static int vnet_mod_destructor(struct vnet_modlink *);
#define VNET_LIST_WLOCK() \
mtx_lock(&vnet_list_refc_mtx); \
while (vnet_list_refc != 0) \
cv_wait(&vnet_list_condvar, &vnet_list_refc_mtx);
struct rwlock vnet_rwlock;
struct sx vnet_sxlock;
#define VNET_LIST_WUNLOCK() \
mtx_unlock(&vnet_list_refc_mtx);
#define VNET_LIST_WLOCK() do { \
sx_xlock(&vnet_sxlock); \
rw_wlock(&vnet_rwlock); \
} while (0)
#define VNET_LIST_WUNLOCK() do { \
rw_wunlock(&vnet_rwlock); \
sx_xunlock(&vnet_sxlock); \
} while (0)
struct vnet_list_head vnet_head;
struct cv vnet_list_condvar;
struct mtx vnet_list_refc_mtx;
int vnet_list_refc = 0;
struct vnet *vnet0;
/*
* Move an ifnet to or from another vnet, specified by the jail id.
*/
@ -373,17 +372,23 @@ vnet_foreach(void (*vnet_foreach_fn)(struct vnet *, void *), void *arg)
}
static void
vi_init(void *unused)
vnet_init_prelink(void *arg)
{
rw_init(&vnet_rwlock, "vnet_rwlock");
sx_init(&vnet_sxlock, "vnet_sxlock");
LIST_INIT(&vnet_head);
}
SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST,
vnet_init_prelink, NULL);
static void
vnet0_init(void *arg)
{
TAILQ_INIT(&vnet_modlink_head);
TAILQ_INIT(&vnet_modpending_head);
LIST_INIT(&vnet_head);
mtx_init(&vnet_list_refc_mtx, "vnet_list_refc_mtx", NULL, MTX_DEF);
cv_init(&vnet_list_condvar, "vnet_list_condvar");
/*
* We MUST clear curvnet in vi_init_done() before going SMP,
* otherwise CURVNET_SET() macros would scream about unnecessary
@ -391,9 +396,10 @@ vi_init(void *unused)
*/
curvnet = prison0.pr_vnet = vnet0 = vnet_alloc();
}
SYSINIT(vnet0_init, SI_SUB_VNET, SI_ORDER_FIRST, vnet0_init, NULL);
static void
vi_init_done(void *unused)
vnet_init_done(void *unused)
{
struct vnet_modlink *vml_iter;
@ -411,8 +417,8 @@ vi_init_done(void *unused)
panic("going nowhere without my vnet modules!");
}
SYSINIT(vimage, SI_SUB_VIMAGE, SI_ORDER_FIRST, vi_init, NULL);
SYSINIT(vimage_done, SI_SUB_VIMAGE_DONE, SI_ORDER_FIRST, vi_init_done, NULL);
SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done,
NULL);
#ifdef DDB
DB_SHOW_COMMAND(vnets, db_show_vnets)

View File

@ -1793,8 +1793,8 @@ if_slowtimo(void *arg)
struct ifnet *ifp;
int s = splimp();
VNET_LIST_RLOCK_NOSLEEP();
IFNET_RLOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
@ -1805,8 +1805,8 @@ if_slowtimo(void *arg)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IFNET_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
splx(s);
timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
}

View File

@ -561,8 +561,8 @@ ng_gif_mod_event(module_t mod, int event, void *data)
ng_gif_input_orphan_p = ng_gif_input_orphan;
/* Create nodes for any already-existing gif interfaces */
VNET_LIST_RLOCK_NOSLEEP();
IFNET_RLOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET_QUIET(vnet_iter); /* XXX revisit quiet */
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
@ -571,8 +571,8 @@ ng_gif_mod_event(module_t mod, int event, void *data)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IFNET_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
break;
case MOD_UNLOAD:

View File

@ -1616,13 +1616,13 @@ igmp_fasttimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
igmp_fasttimo_vnet();
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@ -2159,13 +2159,13 @@ igmp_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
igmp_slowtimo_vnet();
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*

View File

@ -1570,7 +1570,7 @@ ipport_tick(void *xtp)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */
if (V_ipport_tcpallocs <=
@ -1582,7 +1582,7 @@ ipport_tick(void *xtp)
V_ipport_tcplastcount = V_ipport_tcpallocs;
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
}

View File

@ -319,7 +319,7 @@ in_rtqdrain(void)
struct rtqk_arg arg;
int fibnum;
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
@ -336,7 +336,7 @@ in_rtqdrain(void)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
static int _in_rt_was_here;

View File

@ -1193,8 +1193,8 @@ ip_slowtimo(void)
struct ipq *fp;
int i;
VNET_LIST_RLOCK_NOSLEEP();
IPQ_LOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
@ -1228,8 +1228,8 @@ ip_slowtimo(void)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IPQ_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@ -1241,8 +1241,8 @@ ip_drain(void)
VNET_ITERATOR_DECL(vnet_iter);
int i;
VNET_LIST_RLOCK_NOSLEEP();
IPQ_LOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
for (i = 0; i < IPREASS_NHASH; i++) {
@ -1254,8 +1254,8 @@ ip_drain(void)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IPQ_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
in_rtqdrain();
}

View File

@ -940,7 +940,7 @@ tcp_drain(void)
if (!do_tcpdrain)
return;
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
struct inpcb *inpb;
@ -976,7 +976,7 @@ tcp_drain(void)
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@ -1576,7 +1576,7 @@ tcp_isn_tick(void *xtp)
VNET_ITERATOR_DECL(vnet_iter);
u_int32_t projected_offset;
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
ISN_LOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS */
@ -1590,7 +1590,7 @@ tcp_isn_tick(void *xtp)
CURVNET_RESTORE();
}
ISN_UNLOCK();
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
callout_reset(&isn_callout, hz/100, tcp_isn_tick, NULL);
}

View File

@ -127,7 +127,7 @@ tcp_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
@ -136,7 +136,7 @@ tcp_slowtimo(void)
INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =

View File

@ -720,8 +720,8 @@ frag6_slowtimo(void)
VNET_ITERATOR_DECL(vnet_iter);
struct ip6q *q6;
VNET_LIST_RLOCK_NOSLEEP();
IP6Q_LOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
q6 = V_ip6q.ip6q_next;
@ -748,8 +748,8 @@ frag6_slowtimo(void)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@ -760,9 +760,11 @@ frag6_drain(void)
{
VNET_ITERATOR_DECL(vnet_iter);
if (IP6Q_TRYLOCK() == 0)
VNET_LIST_RLOCK_NOSLEEP();
if (IP6Q_TRYLOCK() == 0) {
VNET_LIST_RUNLOCK_NOSLEEP();
return;
VNET_LIST_RLOCK();
}
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
while (V_ip6q.ip6q_next != &V_ip6q) {
@ -772,6 +774,6 @@ frag6_drain(void)
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IP6Q_UNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}

View File

@ -1306,13 +1306,13 @@ mld_fasttimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
mld_fasttimo_vnet();
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*
@ -1721,13 +1721,13 @@ mld_slowtimo(void)
{
VNET_ITERATOR_DECL(vnet_iter);
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
mld_slowtimo_vnet();
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
}
/*

View File

@ -4537,7 +4537,7 @@ key_timehandler(void)
VNET_ITERATOR_DECL(vnet_iter);
time_t now = time_second;
VNET_LIST_RLOCK();
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
key_flush_spd(now);
@ -4546,7 +4546,7 @@ key_timehandler(void)
key_flush_spacq(now);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
VNET_LIST_RUNLOCK_NOSLEEP();
#ifndef IPSEC_DEBUG2
/* do exchange to tick time !! */

View File

@ -106,13 +106,14 @@ enum sysinit_sub_id {
SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */
SI_SUB_LOCK = 0x1B00000, /* various locks */
SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */
SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */
SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */
SI_SUB_VIMAGE = 0x21E0000, /* vimage infrastructure */
SI_SUB_VNET = 0x21E0000, /* vnet 0 */
SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/
SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/
SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */
@ -158,7 +159,7 @@ enum sysinit_sub_id {
SI_SUB_SWAP = 0xc000000, /* swap */
SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/
SI_SUB_SYSCALLS = 0xd800000, /* register system calls */
SI_SUB_VIMAGE_DONE = 0xdc00000, /* vnet registration complete */
SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */
SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/
SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/
SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/

View File

@ -38,6 +38,9 @@
#ifdef _KERNEL
#include <sys/lock.h>
#include <sys/sx.h>
#ifdef INVARIANTS
#define VNET_DEBUG
#endif
@ -176,17 +179,42 @@ struct vnet {
#endif /* !VIMAGE */
#ifdef VIMAGE
/*
* Global linked list of all virtual network stacks, along with read locks to
* access it. If a caller may sleep while accessing the list, it must use
* the sleepable lock macros.
*/
LIST_HEAD(vnet_list_head, vnet);
extern struct vnet_list_head vnet_head;
extern struct vnet *vnet0;
#define VNET_ITERATOR_DECL(arg) struct vnet *arg;
#define VNET_FOREACH(arg) LIST_FOREACH(arg, &vnet_head, vnet_le)
#else
extern struct rwlock vnet_rwlock;
extern struct sx vnet_sxlock;
#define VNET_LIST_RLOCK() sx_slock(&vnet_sxlock)
#define VNET_LIST_RLOCK_NOSLEEP() rw_rlock(&vnet_rwlock)
#define VNET_LIST_RUNLOCK() sx_sunlock(&vnet_sxlock)
#define VNET_LIST_RUNLOCK_NOSLEEP() rw_runlock(&vnet_rwlock)
/*
* Iteration macros to walk the global list of virtual network stacks.
*/
#define VNET_ITERATOR_DECL(arg) struct vnet *arg
#define VNET_FOREACH(arg) LIST_FOREACH((arg), &vnet_head, vnet_le)
#else /* !VIMAGE */
/*
* No-op macros for the !VIMAGE case.
*/
#define VNET_LIST_RLOCK()
#define VNET_LIST_RLOCK_NOSLEEP()
#define VNET_LIST_RUNLOCK()
#define VNET_LIST_RUNLOCK_NOSLEEP()
#define VNET_ITERATOR_DECL(arg)
#define VNET_FOREACH(arg)
#endif
#endif /* VIMAGE */
#ifdef VIMAGE
extern struct vnet *vnet0;
#define IS_DEFAULT_VNET(arg) ((arg) == vnet0)
#else
#define IS_DEFAULT_VNET(arg) 1
@ -202,10 +230,6 @@ extern struct vnet *vnet0;
#define P_TO_VNET(p) NULL
#endif /* VIMAGE */
/* Non-VIMAGE null-macros */
#define VNET_LIST_RLOCK()
#define VNET_LIST_RUNLOCK()
#endif /* _KERNEL */
#endif /* !_SYS_VIMAGE_H_ */