freebsd-skq/sys/kern/vfs_hash.c
Chuck Silvers a0a36d4886 vfs: avoid exposing partially constructed vnodes
If multiple threads race calling vfs_hash_insert() while creating vnodes
with the same identity, all of the vnodes which lose the race must be
destroyed before any other thread can see them. Previously this was
accomplished by the vput() in vfs_hash_insert() resulting in the vnode's
VOP_INACTIVE() method calling vgone() before the vnode lock was unlocked,
but at some point changes to the the vnode refcount/inactive logic have caused
that to no longer work, leading to crashes, so instead vfs_hash_insert()
must call vgone() itself before calling vput() on vnodes which lose the race.

Reviewed by:	mjg, kib
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D26291
2020-09-05 00:26:03 +00:00

237 lines
6.0 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2005 Poul-Henning Kamp
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/rwlock.h>
#include <sys/vnode.h>
static MALLOC_DEFINE(M_VFS_HASH, "vfs_hash", "VFS hash table");
static LIST_HEAD(vfs_hash_head, vnode) *vfs_hash_tbl;
static LIST_HEAD(,vnode) vfs_hash_side;
static u_long vfs_hash_mask;
static struct rwlock __exclusive_cache_line vfs_hash_lock;
static void
vfs_hashinit(void *dummy __unused)
{
vfs_hash_tbl = hashinit(desiredvnodes, M_VFS_HASH, &vfs_hash_mask);
rw_init(&vfs_hash_lock, "vfs hash");
LIST_INIT(&vfs_hash_side);
}
/* Must be SI_ORDER_SECOND so desiredvnodes is available */
SYSINIT(vfs_hash, SI_SUB_VFS, SI_ORDER_SECOND, vfs_hashinit, NULL);
u_int
vfs_hash_index(struct vnode *vp)
{
return (vp->v_hash + vp->v_mount->mnt_hashseed);
}
static struct vfs_hash_head *
vfs_hash_bucket(const struct mount *mp, u_int hash)
{
return (&vfs_hash_tbl[(hash + mp->mnt_hashseed) & vfs_hash_mask]);
}
int
vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp;
enum vgetstate vs;
int error;
while (1) {
rw_rlock(&vfs_hash_lock);
LIST_FOREACH(vp, vfs_hash_bucket(mp, hash), v_hashlist) {
if (vp->v_hash != hash)
continue;
if (vp->v_mount != mp)
continue;
if (fn != NULL && fn(vp, arg))
continue;
vs = vget_prep(vp);
rw_runlock(&vfs_hash_lock);
error = vget_finish(vp, flags, vs);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
if (error)
return (error);
*vpp = vp;
return (0);
}
if (vp == NULL) {
rw_runlock(&vfs_hash_lock);
*vpp = NULL;
return (0);
}
}
}
void
vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp;
while (1) {
rw_rlock(&vfs_hash_lock);
LIST_FOREACH(vp, vfs_hash_bucket(mp, hash), v_hashlist) {
if (vp->v_hash != hash)
continue;
if (vp->v_mount != mp)
continue;
if (fn != NULL && fn(vp, arg))
continue;
vhold(vp);
rw_runlock(&vfs_hash_lock);
vref(vp);
vdrop(vp);
*vpp = vp;
return;
}
if (vp == NULL) {
rw_runlock(&vfs_hash_lock);
*vpp = NULL;
return;
}
}
}
void
vfs_hash_remove(struct vnode *vp)
{
rw_wlock(&vfs_hash_lock);
LIST_REMOVE(vp, v_hashlist);
rw_wunlock(&vfs_hash_lock);
}
int
vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp2;
enum vgetstate vs;
int error;
*vpp = NULL;
while (1) {
rw_wlock(&vfs_hash_lock);
LIST_FOREACH(vp2,
vfs_hash_bucket(vp->v_mount, hash), v_hashlist) {
if (vp2->v_hash != hash)
continue;
if (vp2->v_mount != vp->v_mount)
continue;
if (fn != NULL && fn(vp2, arg))
continue;
vs = vget_prep(vp2);
rw_wunlock(&vfs_hash_lock);
error = vget_finish(vp2, flags, vs);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
rw_wlock(&vfs_hash_lock);
LIST_INSERT_HEAD(&vfs_hash_side, vp, v_hashlist);
rw_wunlock(&vfs_hash_lock);
vgone(vp);
vput(vp);
if (!error)
*vpp = vp2;
return (error);
}
if (vp2 == NULL)
break;
}
vp->v_hash = hash;
LIST_INSERT_HEAD(vfs_hash_bucket(vp->v_mount, hash), vp, v_hashlist);
rw_wunlock(&vfs_hash_lock);
return (0);
}
void
vfs_hash_rehash(struct vnode *vp, u_int hash)
{
rw_wlock(&vfs_hash_lock);
LIST_REMOVE(vp, v_hashlist);
LIST_INSERT_HEAD(vfs_hash_bucket(vp->v_mount, hash), vp, v_hashlist);
vp->v_hash = hash;
rw_wunlock(&vfs_hash_lock);
}
void
vfs_hash_changesize(u_long newmaxvnodes)
{
struct vfs_hash_head *vfs_hash_newtbl, *vfs_hash_oldtbl;
u_long vfs_hash_newmask, vfs_hash_oldmask;
struct vnode *vp;
int i;
vfs_hash_newtbl = hashinit(newmaxvnodes, M_VFS_HASH,
&vfs_hash_newmask);
/* If same hash table size, nothing to do */
if (vfs_hash_mask == vfs_hash_newmask) {
free(vfs_hash_newtbl, M_VFS_HASH);
return;
}
/*
* Move everything from the old hash table to the new table.
* None of the vnodes in the table can be recycled because to
* do so, they have to be removed from the hash table.
*/
rw_wlock(&vfs_hash_lock);
vfs_hash_oldtbl = vfs_hash_tbl;
vfs_hash_oldmask = vfs_hash_mask;
vfs_hash_tbl = vfs_hash_newtbl;
vfs_hash_mask = vfs_hash_newmask;
for (i = 0; i <= vfs_hash_oldmask; i++) {
while ((vp = LIST_FIRST(&vfs_hash_oldtbl[i])) != NULL) {
LIST_REMOVE(vp, v_hashlist);
LIST_INSERT_HEAD(
vfs_hash_bucket(vp->v_mount, vp->v_hash),
vp, v_hashlist);
}
}
rw_wunlock(&vfs_hash_lock);
free(vfs_hash_oldtbl, M_VFS_HASH);
}