From a5bfcc2aae7220e79e9e0a04fe7f4f7cd5a55ebb Mon Sep 17 00:00:00 2001 From: kib Date: Sun, 18 Aug 2019 20:36:11 +0000 Subject: [PATCH] Fix an issue with executing tmpfs binary. Suppose that a binary was executed from tmpfs mount, and the text vnode was reclaimed while the binary was still running. It is possible during even the normal operations since tmpfs vnode' vm_object has swap type, and no references on the vnode is held. Also assume that the text vnode was revived for some reason. Then, on the process exit or exec, unmapping of the text mapping tries to remove the text reference from the vnode, but since it went from recycle/instantiation cycle, there is no reference kept, and assertion in VOP_UNSET_TEXT_CHECKED() triggers. Fix this by keeping a use reference on the tmpfs vnode for each exec reference. This prevents the vnode reclamation while executable map entry is active. Do it by adding per-mount flag MNTK_TEXT_REFS that directs vop_stdset_text() to add use ref on first vnode text use, and per-vnode VI_TEXT_REF flag, to record the need on unref in vop_stdunset_text() on last vnode text use going away. Set MNTK_TEXT_REFS for tmpfs mounts. Reported by: bdrewery Tested by: sbruno, pho (previous version) Sponsored by: The FreeBSD Foundation MFC after: 1 week --- sys/fs/tmpfs/tmpfs_vfsops.c | 3 ++- sys/kern/vfs_default.c | 21 +++++++++++++++++++++ sys/sys/mount.h | 1 + sys/sys/vnode.h | 1 + 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c index 27d9d78d8fef..bea0d3ecc7c8 100644 --- a/sys/fs/tmpfs/tmpfs_vfsops.c +++ b/sys/fs/tmpfs/tmpfs_vfsops.c @@ -507,7 +507,8 @@ tmpfs_mount(struct mount *mp) MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; - mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; + mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | + MNTK_TEXT_REFS; MNT_IUNLOCK(mp); mp->mnt_data = tmp; diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 920d6f19c8a9..955fcfd78ddb 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1082,6 +1082,7 @@ int vop_stdset_text(struct vop_set_text_args *ap) { struct vnode *vp; + struct mount *mp; int error; vp = ap->a_vp; @@ -1089,6 +1090,17 @@ vop_stdset_text(struct vop_set_text_args *ap) if (vp->v_writecount > 0) { error = ETXTBSY; } else { + /* + * If requested by fs, keep a use reference to the + * vnode until the last text reference is released. + */ + mp = vp->v_mount; + if (mp != NULL && (mp->mnt_kern_flag & MNTK_TEXT_REFS) != 0 && + vp->v_writecount == 0) { + vp->v_iflag |= VI_TEXT_REF; + vrefl(vp); + } + vp->v_writecount--; error = 0; } @@ -1101,16 +1113,25 @@ vop_stdunset_text(struct vop_unset_text_args *ap) { struct vnode *vp; int error; + bool last; vp = ap->a_vp; + last = false; VI_LOCK(vp); if (vp->v_writecount < 0) { + if ((vp->v_iflag & VI_TEXT_REF) != 0 && + vp->v_writecount == -1) { + last = true; + vp->v_iflag &= ~VI_TEXT_REF; + } vp->v_writecount++; error = 0; } else { error = EINVAL; } VI_UNLOCK(vp); + if (last) + vunref(vp); return (error); } diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 998538eadd47..0bd6d9928074 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -398,6 +398,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNTK_MARKER 0x00001000 #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ +#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 7514837bc4a6..09740c9f721d 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -233,6 +233,7 @@ struct xvnode { * VI_DOOMED is doubly protected by the interlock and vnode lock. Both * are required for writing but the status may be checked with either. */ +#define VI_TEXT_REF 0x0001 /* Text ref grabbed use ref */ #define VI_MOUNT 0x0020 /* Mount in progress */ #define VI_DOOMED 0x0080 /* This vnode is being recycled */ #define VI_FREE 0x0100 /* This vnode is on the freelist */