Add a new "mntfs" pseudo file system which provides private device vnodes for
file systems to safely access their disk devices, and adapt FFS to use it. Also add a new BO_NOBUFS flag to allow enforcing that file systems using mntfs vnodes do not accidentally use the original devfs vnode to create buffers. Reviewed by: kib, mckusick Approved by: imp (mentor) Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D23787
This commit is contained in:
parent
6c37d6032e
commit
f15ccf8836
@ -3479,6 +3479,7 @@ fs/fuse/fuse_main.c optional fusefs
|
|||||||
fs/fuse/fuse_node.c optional fusefs
|
fs/fuse/fuse_node.c optional fusefs
|
||||||
fs/fuse/fuse_vfsops.c optional fusefs
|
fs/fuse/fuse_vfsops.c optional fusefs
|
||||||
fs/fuse/fuse_vnops.c optional fusefs
|
fs/fuse/fuse_vnops.c optional fusefs
|
||||||
|
fs/mntfs/mntfs_vnops.c standard
|
||||||
fs/msdosfs/msdosfs_conv.c optional msdosfs
|
fs/msdosfs/msdosfs_conv.c optional msdosfs
|
||||||
fs/msdosfs/msdosfs_denode.c optional msdosfs
|
fs/msdosfs/msdosfs_denode.c optional msdosfs
|
||||||
fs/msdosfs/msdosfs_fat.c optional msdosfs
|
fs/msdosfs/msdosfs_fat.c optional msdosfs
|
||||||
|
95
sys/fs/mntfs/mntfs_vnops.c
Normal file
95
sys/fs/mntfs/mntfs_vnops.c
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
/*-
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020 Netflix, Inc.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||||
|
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
|
||||||
|
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* $FreeBSD$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/param.h>
|
||||||
|
#include <sys/conf.h>
|
||||||
|
#include <sys/mount.h>
|
||||||
|
#include <sys/vnode.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The "mntfs" VCHR vnodes implemented here provide a safe way for file systems
|
||||||
|
* to access their disk devices. Using the normal devfs vnode has the problem
|
||||||
|
* that if the device disappears, the devfs vnode is vgone'd as part of
|
||||||
|
* removing it from the application-visible namespace, and some file systems
|
||||||
|
* (notably FFS with softdep) get very unhappy if their dirty buffers are
|
||||||
|
* invalidated out from under them. By using a separate, private vnode,
|
||||||
|
* file systems are able to clean up their buffer state in a controlled fashion
|
||||||
|
* when the underlying device disappears.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
mntfs_reclaim(struct vop_reclaim_args *ap)
|
||||||
|
{
|
||||||
|
struct vnode *vp = ap->a_vp;
|
||||||
|
|
||||||
|
dev_rel(vp->v_rdev);
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct vop_vector mntfs_vnodeops = {
|
||||||
|
.vop_default = &default_vnodeops,
|
||||||
|
|
||||||
|
.vop_fsync = vop_stdfsync,
|
||||||
|
.vop_strategy = VOP_PANIC,
|
||||||
|
.vop_reclaim = mntfs_reclaim,
|
||||||
|
};
|
||||||
|
VFS_VOP_VECTOR_REGISTER(mntfs_vnodeops);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a private VCHR vnode for use by a mounted fs.
|
||||||
|
* The underlying device will be the same as for the given vnode.
|
||||||
|
* This mntfs vnode must be freed with mntfs_freevp() rather than just
|
||||||
|
* releasing the reference.
|
||||||
|
*/
|
||||||
|
struct vnode *
|
||||||
|
mntfs_allocvp(struct mount *mp, struct vnode *ovp)
|
||||||
|
{
|
||||||
|
struct vnode *vp;
|
||||||
|
struct cdev *dev;
|
||||||
|
|
||||||
|
ASSERT_VOP_ELOCKED(ovp, __func__);
|
||||||
|
|
||||||
|
dev = ovp->v_rdev;
|
||||||
|
|
||||||
|
getnewvnode("mntfs", mp, &mntfs_vnodeops, &vp);
|
||||||
|
vp->v_type = VCHR;
|
||||||
|
vp->v_data = NULL;
|
||||||
|
dev_ref(dev);
|
||||||
|
vp->v_rdev = dev;
|
||||||
|
|
||||||
|
return (vp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
mntfs_freevp(struct vnode *vp)
|
||||||
|
{
|
||||||
|
|
||||||
|
vgone(vp);
|
||||||
|
vrele(vp);
|
||||||
|
}
|
@ -2289,6 +2289,8 @@ buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
|
|||||||
int error;
|
int error;
|
||||||
|
|
||||||
ASSERT_BO_WLOCKED(bo);
|
ASSERT_BO_WLOCKED(bo);
|
||||||
|
KASSERT((bo->bo_flag & BO_NOBUFS) == 0,
|
||||||
|
("buf_vlist_add: bo %p does not allow bufs", bo));
|
||||||
KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
|
KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
|
||||||
("dead bo %p", bo));
|
("dead bo %p", bo));
|
||||||
KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
|
KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
|
||||||
|
@ -117,6 +117,7 @@ struct bufobj {
|
|||||||
#define BO_ONWORKLST (1 << 0) /* On syncer work-list */
|
#define BO_ONWORKLST (1 << 0) /* On syncer work-list */
|
||||||
#define BO_WWAIT (1 << 1) /* Wait for output to complete */
|
#define BO_WWAIT (1 << 1) /* Wait for output to complete */
|
||||||
#define BO_DEAD (1 << 2) /* Dead; only with INVARIANTS */
|
#define BO_DEAD (1 << 2) /* Dead; only with INVARIANTS */
|
||||||
|
#define BO_NOBUFS (1 << 3) /* No bufs allowed */
|
||||||
|
|
||||||
#define BO_LOCKPTR(bo) (&(bo)->bo_lock)
|
#define BO_LOCKPTR(bo) (&(bo)->bo_lock)
|
||||||
#define BO_LOCK(bo) rw_wlock(BO_LOCKPTR((bo)))
|
#define BO_LOCK(bo) rw_wlock(BO_LOCKPTR((bo)))
|
||||||
|
@ -940,6 +940,8 @@ extern struct sx vfsconf_sx;
|
|||||||
#define vfsconf_unlock() sx_xunlock(&vfsconf_sx)
|
#define vfsconf_unlock() sx_xunlock(&vfsconf_sx)
|
||||||
#define vfsconf_slock() sx_slock(&vfsconf_sx)
|
#define vfsconf_slock() sx_slock(&vfsconf_sx)
|
||||||
#define vfsconf_sunlock() sx_sunlock(&vfsconf_sx)
|
#define vfsconf_sunlock() sx_sunlock(&vfsconf_sx)
|
||||||
|
struct vnode *mntfs_allocvp(struct mount *, struct vnode *);
|
||||||
|
void mntfs_freevp(struct vnode *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Declarations for these vfs default operations are located in
|
* Declarations for these vfs default operations are located in
|
||||||
|
@ -3594,6 +3594,7 @@ buffered_write(fp, uio, active_cred, flags, td)
|
|||||||
struct inode *ip;
|
struct inode *ip;
|
||||||
struct buf *bp;
|
struct buf *bp;
|
||||||
struct fs *fs;
|
struct fs *fs;
|
||||||
|
struct ufsmount *ump;
|
||||||
struct filedesc *fdp;
|
struct filedesc *fdp;
|
||||||
int error;
|
int error;
|
||||||
daddr_t lbn;
|
daddr_t lbn;
|
||||||
@ -3622,10 +3623,12 @@ buffered_write(fp, uio, active_cred, flags, td)
|
|||||||
return (EINVAL);
|
return (EINVAL);
|
||||||
}
|
}
|
||||||
ip = VTOI(vp);
|
ip = VTOI(vp);
|
||||||
if (ITODEVVP(ip) != devvp) {
|
ump = ip->i_ump;
|
||||||
|
if (ump->um_odevvp != devvp) {
|
||||||
vput(vp);
|
vput(vp);
|
||||||
return (EINVAL);
|
return (EINVAL);
|
||||||
}
|
}
|
||||||
|
devvp = ump->um_devvp;
|
||||||
fs = ITOFS(ip);
|
fs = ITOFS(ip);
|
||||||
vput(vp);
|
vput(vp);
|
||||||
foffset_lock_uio(fp, uio, flags);
|
foffset_lock_uio(fp, uio, flags);
|
||||||
|
@ -151,7 +151,7 @@ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
|
|||||||
static int
|
static int
|
||||||
ffs_mount(struct mount *mp)
|
ffs_mount(struct mount *mp)
|
||||||
{
|
{
|
||||||
struct vnode *devvp;
|
struct vnode *devvp, *odevvp;
|
||||||
struct thread *td;
|
struct thread *td;
|
||||||
struct ufsmount *ump = NULL;
|
struct ufsmount *ump = NULL;
|
||||||
struct fs *fs;
|
struct fs *fs;
|
||||||
@ -246,6 +246,7 @@ ffs_mount(struct mount *mp)
|
|||||||
if (mp->mnt_flag & MNT_UPDATE) {
|
if (mp->mnt_flag & MNT_UPDATE) {
|
||||||
ump = VFSTOUFS(mp);
|
ump = VFSTOUFS(mp);
|
||||||
fs = ump->um_fs;
|
fs = ump->um_fs;
|
||||||
|
odevvp = ump->um_odevvp;
|
||||||
devvp = ump->um_devvp;
|
devvp = ump->um_devvp;
|
||||||
if (fsckpid == -1 && ump->um_fsckpid > 0) {
|
if (fsckpid == -1 && ump->um_fsckpid > 0) {
|
||||||
if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
|
if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
|
||||||
@ -337,16 +338,15 @@ ffs_mount(struct mount *mp)
|
|||||||
* If upgrade to read-write by non-root, then verify
|
* If upgrade to read-write by non-root, then verify
|
||||||
* that user has necessary permissions on the device.
|
* that user has necessary permissions on the device.
|
||||||
*/
|
*/
|
||||||
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
|
vn_lock(odevvp, LK_EXCLUSIVE | LK_RETRY);
|
||||||
error = VOP_ACCESS(devvp, VREAD | VWRITE,
|
error = VOP_ACCESS(odevvp, VREAD | VWRITE,
|
||||||
td->td_ucred, td);
|
td->td_ucred, td);
|
||||||
if (error)
|
if (error)
|
||||||
error = priv_check(td, PRIV_VFS_MOUNT_PERM);
|
error = priv_check(td, PRIV_VFS_MOUNT_PERM);
|
||||||
|
VOP_UNLOCK(odevvp);
|
||||||
if (error) {
|
if (error) {
|
||||||
VOP_UNLOCK(devvp);
|
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
VOP_UNLOCK(devvp);
|
|
||||||
fs->fs_flags &= ~FS_UNCLEAN;
|
fs->fs_flags &= ~FS_UNCLEAN;
|
||||||
if (fs->fs_clean == 0) {
|
if (fs->fs_clean == 0) {
|
||||||
fs->fs_flags |= FS_UNCLEAN;
|
fs->fs_flags |= FS_UNCLEAN;
|
||||||
@ -782,8 +782,8 @@ loop:
|
|||||||
* Common code for mount and mountroot
|
* Common code for mount and mountroot
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
ffs_mountfs(devvp, mp, td)
|
ffs_mountfs(odevvp, mp, td)
|
||||||
struct vnode *devvp;
|
struct vnode *odevvp;
|
||||||
struct mount *mp;
|
struct mount *mp;
|
||||||
struct thread *td;
|
struct thread *td;
|
||||||
{
|
{
|
||||||
@ -794,6 +794,7 @@ ffs_mountfs(devvp, mp, td)
|
|||||||
struct ucred *cred;
|
struct ucred *cred;
|
||||||
struct g_consumer *cp;
|
struct g_consumer *cp;
|
||||||
struct mount *nmp;
|
struct mount *nmp;
|
||||||
|
struct vnode *devvp;
|
||||||
int candelete, canspeedup;
|
int candelete, canspeedup;
|
||||||
off_t loc;
|
off_t loc;
|
||||||
|
|
||||||
@ -802,11 +803,13 @@ ffs_mountfs(devvp, mp, td)
|
|||||||
cred = td ? td->td_ucred : NOCRED;
|
cred = td ? td->td_ucred : NOCRED;
|
||||||
ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
|
ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
|
||||||
|
|
||||||
|
devvp = mntfs_allocvp(mp, odevvp);
|
||||||
|
VOP_UNLOCK(odevvp);
|
||||||
KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
|
KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
|
||||||
dev = devvp->v_rdev;
|
dev = devvp->v_rdev;
|
||||||
if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
|
if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
|
||||||
(uintptr_t)mp) == 0) {
|
(uintptr_t)mp) == 0) {
|
||||||
VOP_UNLOCK(devvp);
|
mntfs_freevp(devvp);
|
||||||
return (EBUSY);
|
return (EBUSY);
|
||||||
}
|
}
|
||||||
g_topology_lock();
|
g_topology_lock();
|
||||||
@ -814,12 +817,14 @@ ffs_mountfs(devvp, mp, td)
|
|||||||
g_topology_unlock();
|
g_topology_unlock();
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
|
atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
|
||||||
VOP_UNLOCK(devvp);
|
mntfs_freevp(devvp);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
dev_ref(dev);
|
dev_ref(dev);
|
||||||
devvp->v_bufobj.bo_ops = &ffs_ops;
|
devvp->v_bufobj.bo_ops = &ffs_ops;
|
||||||
VOP_UNLOCK(devvp);
|
BO_LOCK(&odevvp->v_bufobj);
|
||||||
|
odevvp->v_bufobj.bo_flag |= BO_NOBUFS;
|
||||||
|
BO_UNLOCK(&odevvp->v_bufobj);
|
||||||
if (dev->si_iosize_max != 0)
|
if (dev->si_iosize_max != 0)
|
||||||
mp->mnt_iosize_max = dev->si_iosize_max;
|
mp->mnt_iosize_max = dev->si_iosize_max;
|
||||||
if (mp->mnt_iosize_max > MAXPHYS)
|
if (mp->mnt_iosize_max > MAXPHYS)
|
||||||
@ -1020,6 +1025,7 @@ ffs_mountfs(devvp, mp, td)
|
|||||||
ump->um_mountp = mp;
|
ump->um_mountp = mp;
|
||||||
ump->um_dev = dev;
|
ump->um_dev = dev;
|
||||||
ump->um_devvp = devvp;
|
ump->um_devvp = devvp;
|
||||||
|
ump->um_odevvp = odevvp;
|
||||||
ump->um_nindir = fs->fs_nindir;
|
ump->um_nindir = fs->fs_nindir;
|
||||||
ump->um_bptrtodb = fs->fs_fsbtodb;
|
ump->um_bptrtodb = fs->fs_fsbtodb;
|
||||||
ump->um_seqinc = fs->fs_frag;
|
ump->um_seqinc = fs->fs_frag;
|
||||||
@ -1099,7 +1105,11 @@ out:
|
|||||||
free(ump, M_UFSMNT);
|
free(ump, M_UFSMNT);
|
||||||
mp->mnt_data = NULL;
|
mp->mnt_data = NULL;
|
||||||
}
|
}
|
||||||
|
BO_LOCK(&odevvp->v_bufobj);
|
||||||
|
odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS;
|
||||||
|
BO_UNLOCK(&odevvp->v_bufobj);
|
||||||
atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
|
atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
|
||||||
|
mntfs_freevp(devvp);
|
||||||
dev_rel(dev);
|
dev_rel(dev);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
@ -1304,8 +1314,12 @@ ffs_unmount(mp, mntflags)
|
|||||||
}
|
}
|
||||||
g_vfs_close(ump->um_cp);
|
g_vfs_close(ump->um_cp);
|
||||||
g_topology_unlock();
|
g_topology_unlock();
|
||||||
|
BO_LOCK(&ump->um_odevvp->v_bufobj);
|
||||||
|
ump->um_odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS;
|
||||||
|
BO_UNLOCK(&ump->um_odevvp->v_bufobj);
|
||||||
atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
|
atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
|
||||||
vrele(ump->um_devvp);
|
mntfs_freevp(ump->um_devvp);
|
||||||
|
vrele(ump->um_odevvp);
|
||||||
dev_rel(ump->um_dev);
|
dev_rel(ump->um_dev);
|
||||||
mtx_destroy(UFS_MTX(ump));
|
mtx_destroy(UFS_MTX(ump));
|
||||||
if (mp->mnt_gjprovider != NULL) {
|
if (mp->mnt_gjprovider != NULL) {
|
||||||
@ -2293,7 +2307,19 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
|
|||||||
struct buf *tbp;
|
struct buf *tbp;
|
||||||
int error, nocopy;
|
int error, nocopy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the bufobj strategy for the private VCHR vnodes
|
||||||
|
* used by FFS to access the underlying storage device.
|
||||||
|
* We override the default bufobj strategy and thus bypass
|
||||||
|
* VOP_STRATEGY() for these vnodes.
|
||||||
|
*/
|
||||||
vp = bo2vnode(bo);
|
vp = bo2vnode(bo);
|
||||||
|
KASSERT(bp->b_vp == NULL || bp->b_vp->v_type != VCHR ||
|
||||||
|
bp->b_vp->v_rdev == NULL ||
|
||||||
|
bp->b_vp->v_rdev->si_mountpt == NULL ||
|
||||||
|
VFSTOUFS(bp->b_vp->v_rdev->si_mountpt) == NULL ||
|
||||||
|
vp == VFSTOUFS(bp->b_vp->v_rdev->si_mountpt)->um_devvp,
|
||||||
|
("ffs_geom_strategy() with wrong vp"));
|
||||||
if (bp->b_iocmd == BIO_WRITE) {
|
if (bp->b_iocmd == BIO_WRITE) {
|
||||||
if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
|
if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
|
||||||
bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
|
bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
|
||||||
|
@ -83,7 +83,8 @@ struct ufsmount {
|
|||||||
struct cdev *um_dev; /* (r) device mounted */
|
struct cdev *um_dev; /* (r) device mounted */
|
||||||
struct g_consumer *um_cp; /* (r) GEOM access point */
|
struct g_consumer *um_cp; /* (r) GEOM access point */
|
||||||
struct bufobj *um_bo; /* (r) Buffer cache object */
|
struct bufobj *um_bo; /* (r) Buffer cache object */
|
||||||
struct vnode *um_devvp; /* (r) blk dev mounted vnode */
|
struct vnode *um_odevvp; /* (r) devfs dev vnode */
|
||||||
|
struct vnode *um_devvp; /* (r) mntfs private vnode */
|
||||||
u_long um_fstype; /* (c) type of filesystem */
|
u_long um_fstype; /* (c) type of filesystem */
|
||||||
struct fs *um_fs; /* (r) pointer to superblock */
|
struct fs *um_fs; /* (r) pointer to superblock */
|
||||||
struct ufs_extattr_per_mount um_extattr; /* (c) extended attrs */
|
struct ufs_extattr_per_mount um_extattr; /* (c) extended attrs */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user