From bcd5bb8e570800b8496a3612d1ec5fdb9282b813 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 9 Sep 2012 19:17:15 +0000 Subject: [PATCH] Add a facility for vgone() to inform the set of subscribed mounts about vnode reclamation. Typical use is for the bypass mounts like nullfs to get a notification about lower vnode going away. Now, vgone() calls new VFS op vfs_reclaim_lowervp() with an argument lowervp which is reclaimed. It is possible to register several reclamation event listeners, to correctly handle the case of several nullfs mounts over the same directory. For the filesystem not having nullfs mounts over it, the overhead added is a single mount interlock lock/unlock in the vnode reclamation path. In collaboration with: pho MFC after: 3 weeks --- sys/kern/vfs_mount.c | 5 +++- sys/kern/vfs_subr.c | 55 ++++++++++++++++++++++++++++++++++++++++++++ sys/sys/mount.h | 10 ++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index e9d3abe5476e..f39784269060 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -481,6 +481,7 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath, mac_mount_create(cred, mp); #endif arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0); + TAILQ_INIT(&mp->mnt_uppers); return (mp); } @@ -514,6 +515,7 @@ vfs_mount_destroy(struct mount *mp) vprint("", vp); panic("unmount: dangling vnode"); } + KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); if (mp->mnt_nvnodelistsize != 0) panic("vfs_mount_destroy: nonzero nvnodelistsize"); if (mp->mnt_activevnodelistsize != 0) @@ -1275,7 +1277,8 @@ dounmount(mp, flags, td) } MNT_ILOCK(mp); - if (mp->mnt_kern_flag & MNTK_UNMOUNT) { + if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 || + !TAILQ_EMPTY(&mp->mnt_uppers)) { MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 631d3f205d85..06d16c0d2afc 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2688,6 +2688,58 @@ vgone(struct vnode *vp) VI_UNLOCK(vp); } +static void +vgonel_reclaim_lowervp_vfs(struct mount *mp __unused, + struct vnode *lowervp __unused) +{ +} + +/* + * Notify upper mounts about reclaimed vnode. + */ +static void +vgonel_reclaim_lowervp(struct vnode *vp) +{ + static struct vfsops vgonel_vfsops = { + .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs + }; + struct mount *mp, *ump, *mmp; + + mp = vp->v_mount; + if (mp == NULL) + return; + + MNT_ILOCK(mp); + if (TAILQ_EMPTY(&mp->mnt_uppers)) + goto unlock; + MNT_IUNLOCK(mp); + mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); + mmp->mnt_op = &vgonel_vfsops; + mmp->mnt_kern_flag |= MNTK_MARKER; + MNT_ILOCK(mp); + mp->mnt_kern_flag |= MNTK_VGONE_UPPER; + for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) { + if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) { + ump = TAILQ_NEXT(ump, mnt_upper_link); + continue; + } + TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); + MNT_IUNLOCK(mp); + VFS_RECLAIM_LOWERVP(ump, vp); + MNT_ILOCK(mp); + ump = TAILQ_NEXT(mmp, mnt_upper_link); + TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); + } + free(mmp, M_TEMP); + mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER; + if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) { + mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER; + wakeup(&mp->mnt_uppers); + } +unlock: + MNT_IUNLOCK(mp); +} + /* * vgone, with the vp interlock held. */ @@ -2712,6 +2764,7 @@ vgonel(struct vnode *vp) if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; + /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). @@ -2719,6 +2772,8 @@ vgonel(struct vnode *vp) active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); + vgonel_reclaim_lowervp(vp); + /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 134b0c304794..833ac0f43420 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -188,6 +188,8 @@ struct mount { #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct lock mnt_explock; /* vfs_export walkers lock */ + TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */ + TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/ }; /* @@ -373,6 +375,9 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp); #define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads and writes. Filesystem shall properly handle i/o state on EFAULT. */ +#define MNTK_VGONE_UPPER 0x00000200 +#define MNTK_VGONE_WAITER 0x00000200 +#define MNTK_MARKER 0x00000400 #define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ @@ -629,6 +634,7 @@ typedef int vfs_mount_t(struct mount *mp); typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); +typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp); struct vfsops { vfs_mount_t *vfs_mount; @@ -646,6 +652,7 @@ struct vfsops { vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; + vfs_reclaim_lowervp_t *vfs_reclaim_lowervp; }; vfs_statfs_t __vfs_statfs; @@ -671,6 +678,9 @@ vfs_statfs_t __vfs_statfs; #define VFS_SUSP_CLEAN(MP) \ ({if (*(MP)->mnt_op->vfs_susp_clean != NULL) \ (*(MP)->mnt_op->vfs_susp_clean)(MP); }) +#define VFS_RECLAIM_LOWERVP(MP, VP) \ + ({if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) \ + (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); }) #define VFS_NEEDSGIANT_(MP) \ ((MP) != NULL && ((MP)->mnt_kern_flag & MNTK_MPSAFE) == 0)