Remove Giant acquisition from the mount and unmount pathes.
It could be claimed that two things were reasonable protected by Giant. One is vfsconf list links, which is converted to the new dedicated sx vfsconf_sx. Another is vfsconf.vfc_refcount, which is now updated with atomics. Note that vfc_refcount still has the same races now as it has under the Giant, the unload of filesystem modules can happen while the module is still in use. Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks
This commit is contained in:
parent
bbb1622644
commit
17b754a026
@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/linker.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/syscallsubr.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/vnode.h>
|
||||
@ -64,6 +65,8 @@ int maxvfsconf = VFS_GENERIC + 1;
|
||||
* New entries are added/deleted by vfs_register()/vfs_unregister()
|
||||
*/
|
||||
struct vfsconfhead vfsconf = TAILQ_HEAD_INITIALIZER(vfsconf);
|
||||
struct sx vfsconf_sx;
|
||||
SX_SYSINIT(vfsconf, &vfsconf_sx, "vfsconf");
|
||||
|
||||
/*
|
||||
* Loader.conf variable vfs.typenumhash enables setting vfc_typenum using a hash
|
||||
@ -104,17 +107,30 @@ struct vattr va_null;
|
||||
* Routines having to do with the management of the vnode table.
|
||||
*/
|
||||
|
||||
static struct vfsconf *
|
||||
vfs_byname_locked(const char *name)
|
||||
{
|
||||
struct vfsconf *vfsp;
|
||||
|
||||
sx_assert(&vfsconf_sx, SA_LOCKED);
|
||||
if (!strcmp(name, "ffs"))
|
||||
name = "ufs";
|
||||
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
|
||||
if (!strcmp(name, vfsp->vfc_name))
|
||||
return (vfsp);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
struct vfsconf *
|
||||
vfs_byname(const char *name)
|
||||
{
|
||||
struct vfsconf *vfsp;
|
||||
|
||||
if (!strcmp(name, "ffs"))
|
||||
name = "ufs";
|
||||
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list)
|
||||
if (!strcmp(name, vfsp->vfc_name))
|
||||
return (vfsp);
|
||||
return (NULL);
|
||||
vfsconf_slock();
|
||||
vfsp = vfs_byname_locked(name);
|
||||
vfsconf_sunlock();
|
||||
return (vfsp);
|
||||
}
|
||||
|
||||
struct vfsconf *
|
||||
@ -168,8 +184,11 @@ vfs_register(struct vfsconf *vfc)
|
||||
vfc->vfc_name, vfc->vfc_version);
|
||||
return (EINVAL);
|
||||
}
|
||||
if (vfs_byname(vfc->vfc_name) != NULL)
|
||||
vfsconf_lock();
|
||||
if (vfs_byname_locked(vfc->vfc_name) != NULL) {
|
||||
vfsconf_unlock();
|
||||
return (EEXIST);
|
||||
}
|
||||
|
||||
if (vfs_typenumhash != 0) {
|
||||
/*
|
||||
@ -201,26 +220,6 @@ vfs_register(struct vfsconf *vfc)
|
||||
vfc->vfc_typenum = maxvfsconf++;
|
||||
TAILQ_INSERT_TAIL(&vfsconf, vfc, vfc_list);
|
||||
|
||||
/*
|
||||
* If this filesystem has a sysctl node under vfs
|
||||
* (i.e. vfs.xxfs), then change the oid number of that node to
|
||||
* match the filesystem's type number. This allows user code
|
||||
* which uses the type number to read sysctl variables defined
|
||||
* by the filesystem to continue working. Since the oids are
|
||||
* in a sorted list, we need to make sure the order is
|
||||
* preserved by re-registering the oid after modifying its
|
||||
* number.
|
||||
*/
|
||||
sysctl_lock();
|
||||
SLIST_FOREACH(oidp, SYSCTL_CHILDREN(&sysctl___vfs), oid_link)
|
||||
if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) {
|
||||
sysctl_unregister_oid(oidp);
|
||||
oidp->oid_number = vfc->vfc_typenum;
|
||||
sysctl_register_oid(oidp);
|
||||
break;
|
||||
}
|
||||
sysctl_unlock();
|
||||
|
||||
/*
|
||||
* Initialise unused ``struct vfsops'' fields, to use
|
||||
* the vfs_std*() functions. Note, we need the mount
|
||||
@ -280,8 +279,30 @@ vfs_register(struct vfsconf *vfc)
|
||||
* Call init function for this VFS...
|
||||
*/
|
||||
(*(vfc->vfc_vfsops->vfs_init))(vfc);
|
||||
vfsconf_unlock();
|
||||
|
||||
return 0;
|
||||
/*
|
||||
* If this filesystem has a sysctl node under vfs
|
||||
* (i.e. vfs.xxfs), then change the oid number of that node to
|
||||
* match the filesystem's type number. This allows user code
|
||||
* which uses the type number to read sysctl variables defined
|
||||
* by the filesystem to continue working. Since the oids are
|
||||
* in a sorted list, we need to make sure the order is
|
||||
* preserved by re-registering the oid after modifying its
|
||||
* number.
|
||||
*/
|
||||
sysctl_lock();
|
||||
SLIST_FOREACH(oidp, SYSCTL_CHILDREN(&sysctl___vfs), oid_link) {
|
||||
if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) {
|
||||
sysctl_unregister_oid(oidp);
|
||||
oidp->oid_number = vfc->vfc_typenum;
|
||||
sysctl_register_oid(oidp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
sysctl_unlock();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
@ -294,15 +315,22 @@ vfs_unregister(struct vfsconf *vfc)
|
||||
|
||||
i = vfc->vfc_typenum;
|
||||
|
||||
vfsp = vfs_byname(vfc->vfc_name);
|
||||
if (vfsp == NULL)
|
||||
return EINVAL;
|
||||
if (vfsp->vfc_refcount)
|
||||
return EBUSY;
|
||||
vfsconf_lock();
|
||||
vfsp = vfs_byname_locked(vfc->vfc_name);
|
||||
if (vfsp == NULL) {
|
||||
vfsconf_unlock();
|
||||
return (EINVAL);
|
||||
}
|
||||
if (vfsp->vfc_refcount != 0) {
|
||||
vfsconf_unlock();
|
||||
return (EBUSY);
|
||||
}
|
||||
if (vfc->vfc_vfsops->vfs_uninit != NULL) {
|
||||
error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp);
|
||||
if (error)
|
||||
if (error != 0) {
|
||||
vfsconf_unlock();
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
TAILQ_REMOVE(&vfsconf, vfsp, vfc_list);
|
||||
maxtypenum = VFS_GENERIC;
|
||||
@ -310,7 +338,8 @@ vfs_unregister(struct vfsconf *vfc)
|
||||
if (maxtypenum < vfsp->vfc_typenum)
|
||||
maxtypenum = vfsp->vfc_typenum;
|
||||
maxvfsconf = maxtypenum + 1;
|
||||
return 0;
|
||||
vfsconf_unlock();
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -463,9 +463,9 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
|
||||
mp->mnt_activevnodelistsize = 0;
|
||||
mp->mnt_ref = 0;
|
||||
(void) vfs_busy(mp, MBF_NOWAIT);
|
||||
atomic_add_acq_int(&vfsp->vfc_refcount, 1);
|
||||
mp->mnt_op = vfsp->vfc_vfsops;
|
||||
mp->mnt_vfc = vfsp;
|
||||
vfsp->vfc_refcount++; /* XXX Unlocked */
|
||||
mp->mnt_stat.f_type = vfsp->vfc_typenum;
|
||||
mp->mnt_gen++;
|
||||
strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
|
||||
@ -505,7 +505,7 @@ vfs_mount_destroy(struct mount *mp)
|
||||
panic("vfs_mount_destroy: nonzero writeopcount");
|
||||
if (mp->mnt_secondary_writes != 0)
|
||||
panic("vfs_mount_destroy: nonzero secondary_writes");
|
||||
mp->mnt_vfc->vfc_refcount--;
|
||||
atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
|
||||
if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
|
||||
struct vnode *vp;
|
||||
|
||||
@ -736,17 +736,12 @@ sys_mount(td, uap)
|
||||
}
|
||||
|
||||
AUDIT_ARG_TEXT(fstype);
|
||||
mtx_lock(&Giant);
|
||||
vfsp = vfs_byname_kld(fstype, td, &error);
|
||||
free(fstype, M_TEMP);
|
||||
if (vfsp == NULL) {
|
||||
mtx_unlock(&Giant);
|
||||
if (vfsp == NULL)
|
||||
return (ENOENT);
|
||||
}
|
||||
if (vfsp->vfc_vfsops->vfs_cmount == NULL) {
|
||||
mtx_unlock(&Giant);
|
||||
if (vfsp->vfc_vfsops->vfs_cmount == NULL)
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
|
||||
ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN);
|
||||
ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
|
||||
@ -755,7 +750,6 @@ sys_mount(td, uap)
|
||||
ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec");
|
||||
|
||||
error = vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags);
|
||||
mtx_unlock(&Giant);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -777,7 +771,6 @@ vfs_domount_first(
|
||||
struct vnode *newdp;
|
||||
int error;
|
||||
|
||||
mtx_assert(&Giant, MA_OWNED);
|
||||
ASSERT_VOP_ELOCKED(vp, __func__);
|
||||
KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here"));
|
||||
|
||||
@ -889,7 +882,6 @@ vfs_domount_update(
|
||||
int error, export_error;
|
||||
uint64_t flag;
|
||||
|
||||
mtx_assert(&Giant, MA_OWNED);
|
||||
ASSERT_VOP_ELOCKED(vp, __func__);
|
||||
KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here"));
|
||||
|
||||
@ -1091,7 +1083,6 @@ vfs_domount(
|
||||
error = namei(&nd);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
mtx_lock(&Giant);
|
||||
NDFREE(&nd, NDF_ONLY_PNBUF);
|
||||
vp = nd.ni_vp;
|
||||
if ((fsflags & MNT_UPDATE) == 0) {
|
||||
@ -1106,7 +1097,6 @@ vfs_domount(
|
||||
free(pathbuf, M_TEMP);
|
||||
} else
|
||||
error = vfs_domount_update(td, vp, fsflags, optlist);
|
||||
mtx_unlock(&Giant);
|
||||
|
||||
ASSERT_VI_UNLOCKED(vp, __func__);
|
||||
ASSERT_VOP_UNLOCKED(vp, __func__);
|
||||
@ -1153,12 +1143,10 @@ sys_unmount(td, uap)
|
||||
free(pathbuf, M_TEMP);
|
||||
return (error);
|
||||
}
|
||||
mtx_lock(&Giant);
|
||||
if (uap->flags & MNT_BYFSID) {
|
||||
AUDIT_ARG_TEXT(pathbuf);
|
||||
/* Decode the filesystem ID. */
|
||||
if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
|
||||
mtx_unlock(&Giant);
|
||||
free(pathbuf, M_TEMP);
|
||||
return (EINVAL);
|
||||
}
|
||||
@ -1198,19 +1186,15 @@ sys_unmount(td, uap)
|
||||
* now, so in the !MNT_BYFSID case return the more likely
|
||||
* EINVAL for compatibility.
|
||||
*/
|
||||
mtx_unlock(&Giant);
|
||||
return ((uap->flags & MNT_BYFSID) ? ENOENT : EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't allow unmounting the root filesystem.
|
||||
*/
|
||||
if (mp->mnt_flag & MNT_ROOTFS) {
|
||||
mtx_unlock(&Giant);
|
||||
if (mp->mnt_flag & MNT_ROOTFS)
|
||||
return (EINVAL);
|
||||
}
|
||||
error = dounmount(mp, uap->flags, td);
|
||||
mtx_unlock(&Giant);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -1228,8 +1212,6 @@ dounmount(mp, flags, td)
|
||||
uint64_t async_flag;
|
||||
int mnt_gen_r;
|
||||
|
||||
mtx_assert(&Giant, MA_OWNED);
|
||||
|
||||
if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
|
||||
mnt_gen_r = mp->mnt_gen;
|
||||
VI_LOCK(coveredvp);
|
||||
|
@ -3233,6 +3233,7 @@ sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS)
|
||||
int error;
|
||||
|
||||
error = 0;
|
||||
vfsconf_slock();
|
||||
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
if (req->flags & SCTL_MASK32)
|
||||
@ -3243,11 +3244,12 @@ sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS)
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
vfsconf_sunlock();
|
||||
return (error);
|
||||
}
|
||||
|
||||
SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD,
|
||||
NULL, 0, sysctl_vfs_conflist,
|
||||
SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD |
|
||||
CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist,
|
||||
"S,xvfsconf", "List of all configured filesystems");
|
||||
|
||||
#ifndef BURN_BRIDGES
|
||||
@ -3277,9 +3279,12 @@ vfs_sysctl(SYSCTL_HANDLER_ARGS)
|
||||
case VFS_CONF:
|
||||
if (namelen != 3)
|
||||
return (ENOTDIR); /* overloaded */
|
||||
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list)
|
||||
vfsconf_slock();
|
||||
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
|
||||
if (vfsp->vfc_typenum == name[2])
|
||||
break;
|
||||
}
|
||||
vfsconf_sunlock();
|
||||
if (vfsp == NULL)
|
||||
return (EOPNOTSUPP);
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
@ -3292,8 +3297,9 @@ vfs_sysctl(SYSCTL_HANDLER_ARGS)
|
||||
return (EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP,
|
||||
vfs_sysctl, "Generic filesystem");
|
||||
static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP |
|
||||
CTLFLAG_MPSAFE, vfs_sysctl,
|
||||
"Generic filesystem");
|
||||
|
||||
#if 1 || defined(COMPAT_PRELITE2)
|
||||
|
||||
@ -3304,6 +3310,7 @@ sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
|
||||
struct vfsconf *vfsp;
|
||||
struct ovfsconf ovfs;
|
||||
|
||||
vfsconf_slock();
|
||||
TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
|
||||
bzero(&ovfs, sizeof(ovfs));
|
||||
ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */
|
||||
@ -3312,10 +3319,13 @@ sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
|
||||
ovfs.vfc_refcount = vfsp->vfc_refcount;
|
||||
ovfs.vfc_flags = vfsp->vfc_flags;
|
||||
error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
|
||||
if (error)
|
||||
return error;
|
||||
if (error != 0) {
|
||||
vfsconf_sunlock();
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
vfsconf_sunlock();
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* 1 || COMPAT_PRELITE2 */
|
||||
@ -3413,8 +3423,9 @@ sysctl_vnode(SYSCTL_HANDLER_ARGS)
|
||||
return (error);
|
||||
}
|
||||
|
||||
SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
|
||||
0, 0, sysctl_vnode, "S,xvnode", "");
|
||||
SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD |
|
||||
CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode",
|
||||
"");
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <sys/lock.h>
|
||||
#include <sys/lockmgr.h>
|
||||
#include <sys/_mutex.h>
|
||||
#include <sys/_sx.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -889,6 +890,11 @@ void vfs_unmountall(void);
|
||||
extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */
|
||||
extern struct mtx mountlist_mtx;
|
||||
extern struct nfs_public nfs_pub;
|
||||
extern struct sx vfsconf_sx;
|
||||
#define vfsconf_lock() sx_xlock(&vfsconf_sx)
|
||||
#define vfsconf_unlock() sx_xunlock(&vfsconf_sx)
|
||||
#define vfsconf_slock() sx_slock(&vfsconf_sx)
|
||||
#define vfsconf_sunlock() sx_sunlock(&vfsconf_sx)
|
||||
|
||||
/*
|
||||
* Declarations for these vfs default operations are located in
|
||||
|
Loading…
Reference in New Issue
Block a user