Fix a race during null node creation between relookuping the hash and

adding vnode to hash. The fix is to use atomic hash-lookup-and-add-if-
not-found operation. The odd thing is that this race can't happen
actually because the lowervp vnode is locked exclusively now during the
whole process of null node creation. This must be thought as a step
toward shared lookups.

Also remove vp->v_mount checks when looking for a match in the hash,
as this is the vestige.

Also add comments and cosmetic changes.
This commit is contained in:
semenu 2002-06-13 21:49:09 +00:00
parent e930b35ecd
commit 50d99cdfec
4 changed files with 102 additions and 105 deletions

View File

@ -59,7 +59,7 @@ struct null_node {
int nullfs_init(struct vfsconf *vfsp);
int nullfs_uninit(struct vfsconf *vfsp);
int null_node_create(struct mount *mp, struct vnode *target, struct vnode **vpp);
int null_nodeget(struct mount *mp, struct vnode *target, struct vnode **vpp);
void null_hashrem(struct null_node *xp);
int null_bypass(struct vop_generic_args *ap);

View File

@ -71,10 +71,8 @@ struct mtx null_hashmtx;
static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");
static int null_node_alloc(struct mount *mp, struct vnode *lowervp,
struct vnode **vpp);
static struct vnode *
null_node_find(struct mount *mp, struct vnode *lowervp);
static struct vnode * null_hashget(struct vnode *);
static struct vnode * null_hashins(struct null_node *);
/*
* Initialise cache headers
@ -105,8 +103,7 @@ nullfs_uninit(vfsp)
* Lower vnode should be locked on entry and will be left locked on exit.
*/
static struct vnode *
null_node_find(mp, lowervp)
struct mount *mp;
null_hashget(lowervp)
struct vnode *lowervp;
{
struct thread *td = curthread; /* XXX */
@ -124,7 +121,7 @@ null_node_find(mp, lowervp)
loop:
mtx_lock(&null_hashmtx);
LIST_FOREACH(a, hd, null_hash) {
if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
if (a->null_lowervp == lowervp) {
vp = NULLTOV(a);
mtx_lock(&vp->v_interlock);
mtx_unlock(&null_hashmtx);
@ -140,28 +137,75 @@ loop:
}
}
mtx_unlock(&null_hashmtx);
return NULLVP;
return (NULLVP);
}
/*
* Act like null_hashget, but add passed null_node to hash if no existing
* node found.
*/
static struct vnode *
null_hashins(xp)
struct null_node *xp;
{
struct thread *td = curthread; /* XXX */
struct null_node_hashhead *hd;
struct null_node *oxp;
struct vnode *ovp;
hd = NULL_NHASH(xp->null_lowervp);
loop:
mtx_lock(&null_hashmtx);
LIST_FOREACH(oxp, hd, null_hash) {
if (oxp->null_lowervp == xp->null_lowervp) {
ovp = NULLTOV(oxp);
mtx_lock(&ovp->v_interlock);
mtx_unlock(&null_hashmtx);
if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td))
goto loop;
return (ovp);
}
}
LIST_INSERT_HEAD(hd, xp, null_hash);
mtx_unlock(&null_hashmtx);
return (NULLVP);
}
/*
* Make a new null_node node.
* Vp is the alias vnode, lofsvp is the lower vnode.
* Maintain a reference to (lowervp).
* Make a new or get existing nullfs node.
* Vp is the alias vnode, lowervp is the lower vnode.
*
* The lowervp assumed to be locked and having "spare" reference. This routine
* vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers"
* the caller's "spare" reference to created nullfs vnode.
*/
static int
null_node_alloc(mp, lowervp, vpp)
int
null_nodeget(mp, lowervp, vpp)
struct mount *mp;
struct vnode *lowervp;
struct vnode **vpp;
{
struct thread *td = curthread; /* XXX */
struct null_node_hashhead *hd;
struct null_node *xp;
struct vnode *othervp, *vp;
struct vnode *vp;
int error;
/* Lookup the hash firstly */
*vpp = null_hashget(lowervp);
if (*vpp != NULL) {
vrele(lowervp);
return (0);
}
/*
* We do not serialize vnode creation, instead we will check for
* duplicates later, when adding new vnode to hash.
*
* Note that duplicate can only appear in hash if the lowervp is
* locked LK_SHARED.
*/
/*
* Do the MALLOC before the getnewvnode since doing so afterward
* might cause a bogus v_data pointer to get dereferenced
@ -170,12 +214,11 @@ null_node_alloc(mp, lowervp, vpp)
MALLOC(xp, struct null_node *, sizeof(struct null_node),
M_NULLFSNODE, M_WAITOK);
error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp);
error = getnewvnode(VT_NULL, mp, null_vnodeop_p, &vp);
if (error) {
FREE(xp, M_NULLFSNODE);
return (error);
}
vp = *vpp;
xp->null_vnode = vp;
xp->null_lowervp = lowervp;
@ -186,19 +229,6 @@ null_node_alloc(mp, lowervp, vpp)
/* Though v_lock is inited by getnewvnode(), we want our own wmesg */
lockinit(&vp->v_lock, PVFS, "nunode", VLKTIMEOUT, LK_NOPAUSE);
/*
* Before we insert our new node onto the hash chains,
* check to see if someone else has beaten us to it.
* (We could have slept in MALLOC.)
*/
othervp = null_node_find(mp, lowervp);
if (othervp) {
xp->null_lowervp = NULL;
vrele(vp);
*vpp = othervp;
return 0;
};
/*
* From NetBSD:
* Now lock the new node. We rely on the fact that we were passed
@ -211,78 +241,38 @@ null_node_alloc(mp, lowervp, vpp)
vp->v_vnlock = lowervp->v_vnlock;
error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td);
if (error)
panic("null_node_alloc: can't lock new vnode\n");
VREF(lowervp);
hd = NULL_NHASH(lowervp);
mtx_lock(&null_hashmtx);
LIST_INSERT_HEAD(hd, xp, null_hash);
mtx_unlock(&null_hashmtx);
return 0;
}
panic("null_nodeget: can't lock new vnode\n");
/*
* Try to find an existing null_node vnode refering to the given underlying
* vnode (which should be locked). If no vnode found, create a new null_node
* vnode which contains a reference to the lower vnode.
*/
int
null_node_create(mp, lowervp, newvpp)
struct mount *mp;
struct vnode *lowervp;
struct vnode **newvpp;
{
struct vnode *aliasvp;
aliasvp = null_node_find(mp, lowervp);
if (aliasvp) {
/*
* null_node_find has taken another reference
* to the alias vnode.
* Atomically insert our new node into the hash or vget existing
* if someone else has beaten us to it.
*/
*vpp = null_hashins(xp);
if (*vpp != NULL) {
vrele(lowervp);
#ifdef NULLFS_DEBUG
vprint("null_node_create: exists", aliasvp);
#endif
} else {
int error;
/*
* Get new vnode.
*/
NULLFSDEBUG("null_node_create: create new alias vnode\n");
/*
* Make new vnode reference the null_node.
*/
error = null_node_alloc(mp, lowervp, &aliasvp);
if (error)
return error;
/*
* aliasvp is already VREF'd by getnewvnode()
*/
}
#ifdef DIAGNOSTIC
if (lowervp->v_usecount < 1) {
/* Should never happen... */
vprint ("null_node_create: alias ", aliasvp);
vprint ("null_node_create: lower ", lowervp);
panic ("null_node_create: lower has 0 usecount.");
};
#endif
#ifdef NULLFS_DEBUG
vprint("null_node_create: alias", aliasvp);
vprint("null_node_create: lower", lowervp);
#endif
*newvpp = aliasvp;
VOP_UNLOCK(vp, LK_THISLAYER, td);
vp->v_vnlock = NULL;
xp->null_lowervp = NULL;
vrele(vp);
return (0);
}
/*
* XXX We take extra vref just to workaround UFS's XXX:
* UFS can vrele() vnode in VOP_CLOSE() in some cases. Luckily, this
* can only happen if v_usecount == 1. To workaround, we just don't
* let v_usecount be 1, it will be 2 or more.
*/
VREF(lowervp);
*vpp = vp;
return (0);
}
/*
* Remove node from hash.
*/
void
null_hashrem(xp)
struct null_node *xp;

View File

@ -169,7 +169,7 @@ nullfs_mount(mp, ndp, td)
* Save reference. Each mount also holds
* a reference on the root vnode.
*/
error = null_node_create(mp, lowerrootvp, &vp);
error = null_nodeget(mp, lowerrootvp, &vp);
/*
* Unlock the node (either the lower or the alias)
*/
@ -354,7 +354,7 @@ nullfs_vget(mp, ino, flags, vpp)
if (error)
return (error);
return (null_node_create(mp, *vpp, vpp));
return (null_nodeget(mp, *vpp, vpp));
}
static int
@ -368,7 +368,7 @@ nullfs_fhtovp(mp, fidp, vpp)
if (error)
return (error);
return (null_node_create(mp, *vpp, vpp));
return (null_nodeget(mp, *vpp, vpp));
}
static int

View File

@ -346,7 +346,7 @@ null_bypass(ap)
vppp = VOPARG_OFFSETTO(struct vnode***,
descp->vdesc_vpp_offset,ap);
if (*vppp)
error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
error = null_nodeget(old_vps[0]->v_mount, **vppp, *vppp);
}
out:
@ -400,8 +400,11 @@ null_lookup(ap)
VREF(dvp);
vrele(lvp);
} else {
error = null_node_create(dvp->v_mount, lvp, &vp);
if (error == 0)
error = null_nodeget(dvp->v_mount, lvp, &vp);
if (error) {
/* XXX Cleanup needed... */
panic("null_nodeget failed");
}
*ap->a_vpp = vp;
}
}
@ -706,6 +709,11 @@ null_islocked(ap)
* There is no way to tell that someone issued remove/rmdir operation
* on the underlying filesystem. For now we just have to release lowevrp
* as soon as possible.
*
* Note, we can't release any resources nor remove vnode from hash before
* appropriate VXLOCK stuff is is done because other process can find this
* vnode in hash during inactivation and may be sitting in vget() and waiting
* for null_inactive to unlock vnode. Thus we will do all those in VOP_RECLAIM.
*/
static int
null_inactive(ap)
@ -729,8 +737,7 @@ null_inactive(ap)
}
/*
* We can free memory in null_inactive, but we do this
* here. (Possible to guard vp->v_data to point somewhere)
* Now, the VXLOCK is in force and we're free to destroy the null vnode.
*/
static int
null_reclaim(ap)