2005-01-06 18:10:42 +00:00
|
|
|
/*-
|
1997-02-10 02:22:35 +00:00
|
|
|
* Copyright (c) 1992, 1993, 1995
|
1994-05-24 10:09:53 +00:00
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* This code is derived from software donated to Berkeley by
|
|
|
|
* Jan-Simon Pendry.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
|
|
|
|
*
|
|
|
|
* @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
|
1999-08-28 01:08:13 +00:00
|
|
|
* $FreeBSD$
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Null Layer
|
|
|
|
* (See null_vnops.c for a description of what this does.)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
2008-03-31 12:01:21 +00:00
|
|
|
#include <sys/fcntl.h>
|
1997-02-12 17:47:28 +00:00
|
|
|
#include <sys/kernel.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/lock.h>
|
1997-10-12 20:26:33 +00:00
|
|
|
#include <sys/malloc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <sys/namei.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/vnode.h>
|
2012-02-23 18:51:24 +00:00
|
|
|
#include <sys/jail.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
|
2001-05-23 09:42:29 +00:00
|
|
|
#include <fs/nullfs/null.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2005-10-31 15:41:29 +00:00
|
|
|
static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
|
1997-10-12 20:26:33 +00:00
|
|
|
|
2003-06-12 20:48:38 +00:00
|
|
|
static vfs_fhtovp_t nullfs_fhtovp;
|
2004-07-30 22:08:52 +00:00
|
|
|
static vfs_mount_t nullfs_mount;
|
2003-06-12 20:48:38 +00:00
|
|
|
static vfs_quotactl_t nullfs_quotactl;
|
|
|
|
static vfs_root_t nullfs_root;
|
|
|
|
static vfs_sync_t nullfs_sync;
|
|
|
|
static vfs_statfs_t nullfs_statfs;
|
|
|
|
static vfs_unmount_t nullfs_unmount;
|
|
|
|
static vfs_vget_t nullfs_vget;
|
|
|
|
static vfs_extattrctl_t nullfs_extattrctl;
|
1995-12-03 14:54:48 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Mount null layer
|
|
|
|
*/
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_mount(struct mount *mp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
int error = 0;
|
|
|
|
struct vnode *lowerrootvp, *vp;
|
|
|
|
struct vnode *nullm_rootvp;
|
|
|
|
struct null_mount *xmp;
|
2012-02-23 18:51:24 +00:00
|
|
|
struct thread *td = curthread;
|
2002-05-23 23:07:27 +00:00
|
|
|
char *target;
|
|
|
|
int isvnunlocked = 0, len;
|
2004-07-30 22:08:52 +00:00
|
|
|
struct nameidata nd, *ndp = &nd;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2000-09-05 09:02:07 +00:00
|
|
|
NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2012-02-23 18:51:24 +00:00
|
|
|
if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_NULLFS))
|
|
|
|
return (EPERM);
|
2004-11-09 22:21:10 +00:00
|
|
|
if (mp->mnt_flag & MNT_ROOTFS)
|
|
|
|
return (EOPNOTSUPP);
|
2013-01-03 19:17:57 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Update is a no-op
|
|
|
|
*/
|
|
|
|
if (mp->mnt_flag & MNT_UPDATE) {
|
2006-05-28 20:09:18 +00:00
|
|
|
/*
|
|
|
|
* Only support update mounts for NFS export.
|
|
|
|
*/
|
|
|
|
if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
|
|
|
|
return (0);
|
|
|
|
else
|
|
|
|
return (EOPNOTSUPP);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get argument
|
|
|
|
*/
|
2002-05-23 23:07:27 +00:00
|
|
|
error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
|
|
|
|
if (error || target[len - 1] != '\0')
|
|
|
|
return (EINVAL);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-04-17 11:24:57 +00:00
|
|
|
/*
|
2011-10-24 10:35:37 +00:00
|
|
|
* Unlock lower node to avoid possible deadlock.
|
1997-04-17 11:24:57 +00:00
|
|
|
*/
|
2004-12-01 23:16:38 +00:00
|
|
|
if ((mp->mnt_vnodecovered->v_op == &null_vnodeops) &&
|
2011-10-24 10:35:37 +00:00
|
|
|
VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(mp->mnt_vnodecovered, 0);
|
1997-04-17 11:24:57 +00:00
|
|
|
isvnunlocked = 1;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Find lower node
|
|
|
|
*/
|
2009-05-11 15:33:26 +00:00
|
|
|
NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread);
|
1994-10-10 07:55:48 +00:00
|
|
|
error = namei(ndp);
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
|
1997-04-17 11:24:57 +00:00
|
|
|
/*
|
|
|
|
* Re-lock vnode.
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
* XXXKIB This is deadlock-prone as well.
|
1997-04-17 11:24:57 +00:00
|
|
|
*/
|
2011-10-24 13:48:13 +00:00
|
|
|
if (isvnunlocked)
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
|
1997-04-17 11:24:57 +00:00
|
|
|
|
1994-10-10 07:55:48 +00:00
|
|
|
if (error)
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
1999-12-15 23:02:35 +00:00
|
|
|
NDFREE(ndp, NDF_ONLY_PNBUF);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check on lower vnode
|
|
|
|
*/
|
|
|
|
lowerrootvp = ndp->ni_vp;
|
|
|
|
|
1997-04-17 11:24:57 +00:00
|
|
|
/*
|
|
|
|
* Check multi null mount to avoid `lock against myself' panic.
|
|
|
|
*/
|
|
|
|
if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
|
2000-09-05 09:02:07 +00:00
|
|
|
NULLFSDEBUG("nullfs_mount: multi null mount?\n");
|
2000-10-22 16:15:12 +00:00
|
|
|
vput(lowerrootvp);
|
1997-04-19 06:04:13 +00:00
|
|
|
return (EDEADLK);
|
1997-04-17 11:24:57 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
|
2013-01-03 19:17:57 +00:00
|
|
|
M_NULLFSMNT, M_WAITOK | M_ZERO);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Save reference to underlying FS
|
|
|
|
*/
|
|
|
|
xmp->nullm_vfs = lowerrootvp->v_mount;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save reference. Each mount also holds
|
|
|
|
* a reference on the root vnode.
|
|
|
|
*/
|
2002-06-13 21:49:09 +00:00
|
|
|
error = null_nodeget(mp, lowerrootvp, &vp);
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Make sure the node alias worked
|
|
|
|
*/
|
|
|
|
if (error) {
|
2012-01-03 21:09:07 +00:00
|
|
|
free(xmp, M_NULLFSMNT);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep a held reference to the root vnode.
|
|
|
|
* It is vrele'd in nullfs_unmount.
|
|
|
|
*/
|
|
|
|
nullm_rootvp = vp;
|
2002-08-04 10:29:36 +00:00
|
|
|
nullm_rootvp->v_vflag |= VV_ROOT;
|
1994-05-24 10:09:53 +00:00
|
|
|
xmp->nullm_rootvp = nullm_rootvp;
|
2002-09-25 02:28:07 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlock the node (either the lower or the alias)
|
|
|
|
*/
|
2008-01-13 14:44:15 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2002-09-25 02:28:07 +00:00
|
|
|
|
2006-09-26 04:12:49 +00:00
|
|
|
if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
|
|
|
|
MNT_ILOCK(mp);
|
1994-05-24 10:09:53 +00:00
|
|
|
mp->mnt_flag |= MNT_LOCAL;
|
2006-09-26 04:12:49 +00:00
|
|
|
MNT_IUNLOCK(mp);
|
|
|
|
}
|
2013-01-03 19:17:57 +00:00
|
|
|
|
|
|
|
xmp->nullm_flags |= NULLM_CACHE;
|
|
|
|
if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0)
|
|
|
|
xmp->nullm_flags &= ~NULLM_CACHE;
|
|
|
|
|
2006-09-26 04:12:49 +00:00
|
|
|
MNT_ILOCK(mp);
|
2013-01-03 19:17:57 +00:00
|
|
|
if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
|
|
|
|
mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
|
|
|
|
(MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
|
|
|
|
MNTK_EXTENDED_SHARED);
|
|
|
|
}
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT;
|
2006-09-26 04:12:49 +00:00
|
|
|
MNT_IUNLOCK(mp);
|
2012-03-13 10:04:13 +00:00
|
|
|
mp->mnt_data = xmp;
|
1997-02-10 02:22:35 +00:00
|
|
|
vfs_getnewfsid(mp);
|
2013-01-03 19:17:57 +00:00
|
|
|
if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
|
|
|
|
MNT_ILOCK(xmp->nullm_vfs);
|
|
|
|
TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp,
|
|
|
|
mnt_upper_link);
|
|
|
|
MNT_IUNLOCK(xmp->nullm_vfs);
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-12-06 20:02:13 +00:00
|
|
|
vfs_mountedfrom(mp, target);
|
|
|
|
|
2000-09-05 09:02:07 +00:00
|
|
|
NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
|
1994-05-24 10:09:53 +00:00
|
|
|
mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free reference to null layer
|
|
|
|
*/
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_unmount(mp, mntflags)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
|
|
|
int mntflags;
|
|
|
|
{
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
struct null_mount *mntdata;
|
|
|
|
struct mount *ump;
|
|
|
|
int error, flags;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2000-09-05 09:02:07 +00:00
|
|
|
NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1997-02-10 02:22:35 +00:00
|
|
|
if (mntflags & MNT_FORCE)
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
flags = FORCECLOSE;
|
|
|
|
else
|
|
|
|
flags = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2001-05-16 18:04:37 +00:00
|
|
|
/* There is 1 extra root vnode reference (nullm_rootvp). */
|
2009-05-11 15:33:26 +00:00
|
|
|
error = vflush(mp, 1, flags, curthread);
|
1994-10-10 07:55:48 +00:00
|
|
|
if (error)
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Finally, throw away the null_mount structure
|
|
|
|
*/
|
2000-09-05 09:02:07 +00:00
|
|
|
mntdata = mp->mnt_data;
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
ump = mntdata->nullm_vfs;
|
2013-01-03 19:17:57 +00:00
|
|
|
if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
|
|
|
|
MNT_ILOCK(ump);
|
|
|
|
while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) {
|
|
|
|
ump->mnt_kern_flag |= MNTK_VGONE_WAITER;
|
|
|
|
msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0);
|
|
|
|
}
|
|
|
|
TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
|
|
|
|
MNT_IUNLOCK(ump);
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
}
|
2012-03-13 10:04:13 +00:00
|
|
|
mp->mnt_data = NULL;
|
2000-09-25 15:38:32 +00:00
|
|
|
free(mntdata, M_NULLFSMNT);
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
return (0);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_root(mp, flags, vpp)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
2005-03-24 07:36:16 +00:00
|
|
|
int flags;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct vnode **vpp;
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
|
2000-09-05 09:02:07 +00:00
|
|
|
NULLFSDEBUG("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp,
|
1998-07-30 17:40:45 +00:00
|
|
|
(void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
|
|
|
|
(void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Return locked reference to root.
|
|
|
|
*/
|
|
|
|
vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
|
|
|
|
VREF(vp);
|
2000-09-25 15:38:32 +00:00
|
|
|
|
2011-10-24 13:56:31 +00:00
|
|
|
ASSERT_VOP_UNLOCKED(vp, "root vnode is locked");
|
2008-01-10 01:10:58 +00:00
|
|
|
vn_lock(vp, flags | LK_RETRY);
|
1994-05-24 10:09:53 +00:00
|
|
|
*vpp = vp;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_quotactl(mp, cmd, uid, arg)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
|
|
|
int cmd;
|
|
|
|
uid_t uid;
|
2005-12-14 00:49:52 +00:00
|
|
|
void *arg;
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2009-05-11 15:33:26 +00:00
|
|
|
return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_statfs(mp, sbp)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
|
|
|
struct statfs *sbp;
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
struct statfs mstat;
|
|
|
|
|
2000-09-05 09:02:07 +00:00
|
|
|
NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
|
1998-07-30 17:40:45 +00:00
|
|
|
(void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
|
|
|
|
(void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
bzero(&mstat, sizeof(mstat));
|
|
|
|
|
2009-05-11 15:33:26 +00:00
|
|
|
error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat);
|
1994-05-24 10:09:53 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/* now copy across the "interesting" information and fake the rest */
|
|
|
|
sbp->f_type = mstat.f_type;
|
2013-03-02 12:42:23 +00:00
|
|
|
sbp->f_flags = (sbp->f_flags & (MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID |
|
|
|
|
MNT_UNION | MNT_NOSYMFOLLOW)) | (mstat.f_flags & ~MNT_ROOTFS);
|
1994-05-24 10:09:53 +00:00
|
|
|
sbp->f_bsize = mstat.f_bsize;
|
|
|
|
sbp->f_iosize = mstat.f_iosize;
|
|
|
|
sbp->f_blocks = mstat.f_blocks;
|
|
|
|
sbp->f_bfree = mstat.f_bfree;
|
|
|
|
sbp->f_bavail = mstat.f_bavail;
|
|
|
|
sbp->f_files = mstat.f_files;
|
|
|
|
sbp->f_ffree = mstat.f_ffree;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_sync(mp, waitfor)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
|
|
|
int waitfor;
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* XXX - Assumes no data cached at null layer.
|
|
|
|
*/
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2002-03-17 01:25:47 +00:00
|
|
|
nullfs_vget(mp, ino, flags, vpp)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
|
|
|
ino_t ino;
|
2002-03-17 01:25:47 +00:00
|
|
|
int flags;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct vnode **vpp;
|
|
|
|
{
|
2000-09-25 15:38:32 +00:00
|
|
|
int error;
|
2012-02-29 15:09:20 +00:00
|
|
|
|
|
|
|
KASSERT((flags & LK_TYPE_MASK) != 0,
|
|
|
|
("nullfs_vget: no lock requested"));
|
|
|
|
|
2002-03-17 01:25:47 +00:00
|
|
|
error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
if (error != 0)
|
2000-09-25 15:38:32 +00:00
|
|
|
return (error);
|
2002-06-13 21:49:09 +00:00
|
|
|
return (null_nodeget(mp, *vpp, vpp));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static int
|
2011-05-22 01:07:54 +00:00
|
|
|
nullfs_fhtovp(mp, fidp, flags, vpp)
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mount *mp;
|
|
|
|
struct fid *fidp;
|
2011-05-22 01:07:54 +00:00
|
|
|
int flags;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct vnode **vpp;
|
|
|
|
{
|
2000-09-25 15:38:32 +00:00
|
|
|
int error;
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
|
|
|
|
error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
|
2011-05-22 01:07:54 +00:00
|
|
|
vpp);
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
if (error != 0)
|
2000-09-25 15:38:32 +00:00
|
|
|
return (error);
|
2002-06-13 21:49:09 +00:00
|
|
|
return (null_nodeget(mp, *vpp, vpp));
|
1999-09-11 00:46:08 +00:00
|
|
|
}
|
|
|
|
|
1999-12-19 06:08:07 +00:00
|
|
|
static int
|
2009-05-11 15:33:26 +00:00
|
|
|
nullfs_extattrctl(mp, cmd, filename_vp, namespace, attrname)
|
1999-12-19 06:08:07 +00:00
|
|
|
struct mount *mp;
|
|
|
|
int cmd;
|
o Change the API and ABI of the Extended Attribute kernel interfaces to
introduce a new argument, "namespace", rather than relying on a first-
character namespace indicator. This is in line with more recent
thinking on EA interfaces on various mailing lists, including the
posix1e, Linux acl-devel, and trustedbsd-discuss forums. Two namespaces
are defined by default, EXTATTR_NAMESPACE_SYSTEM and
EXTATTR_NAMESPACE_USER, where the primary distinction lies in the
access control model: user EAs are accessible based on the normal
MAC and DAC file/directory protections, and system attributes are
limited to kernel-originated or appropriately privileged userland
requests.
o These API changes occur at several levels: the namespace argument is
introduced in the extattr_{get,set}_file() system call interfaces,
at the vnode operation level in the vop_{get,set}extattr() interfaces,
and in the UFS extended attribute implementation. Changes are also
introduced in the VFS extattrctl() interface (system call, VFS,
and UFS implementation), where the arguments are modified to include
a namespace field, as well as modified to advoid direct access to
userspace variables from below the VFS layer (in the style of recent
changes to mount by adrian@FreeBSD.org). This required some cleanup
and bug fixing regarding VFS locks and the VFS interface, as a vnode
pointer may now be optionally submitted to the VFS_EXTATTRCTL()
call. Updated documentation for the VFS interface will be committed
shortly.
o In the near future, the auto-starting feature will be updated to
search two sub-directories to the ".attribute" directory in appropriate
file systems: "user" and "system" to locate attributes intended for
those namespaces, as the single filename is no longer sufficient
to indicate what namespace the attribute is intended for. Until this
is committed, all attributes auto-started by UFS will be placed in
the EXTATTR_NAMESPACE_SYSTEM namespace.
o The default POSIX.1e attribute names for ACLs and Capabilities have
been updated to no longer include the '$' in their filename. As such,
if you're using these features, you'll need to rename the attribute
backing files to the same names without '$' symbols in front.
o Note that these changes will require changes in userland, which will
be committed shortly. These include modifications to the extended
attribute utilities, as well as to libutil for new namespace
string conversion routines. Once the matching userland changes are
committed, a buildworld is recommended to update all the necessary
include files and verify that the kernel and userland environments
are in sync. Note: If you do not use extended attributes (most people
won't), upgrading is not imperative although since the system call
API has changed, the new userland extended attribute code will no longer
compile with old include files.
o Couple of minor cleanups while I'm there: make more code compilation
conditional on FFS_EXTATTR, which should recover a bit of space on
kernels running without EA's, as well as update copyright dates.
Obtained from: TrustedBSD Project
2001-03-15 02:54:29 +00:00
|
|
|
struct vnode *filename_vp;
|
|
|
|
int namespace;
|
2000-01-19 06:07:34 +00:00
|
|
|
const char *attrname;
|
1999-12-19 06:08:07 +00:00
|
|
|
{
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
|
|
|
|
return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
|
|
|
|
filename_vp, namespace, attrname));
|
1999-12-19 06:08:07 +00:00
|
|
|
}
|
|
|
|
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
static void
|
|
|
|
nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
|
|
|
|
vp = null_hashget(mp, lowervp);
|
|
|
|
if (vp == NULL)
|
|
|
|
return;
|
2013-05-11 11:17:44 +00:00
|
|
|
VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
vgone(vp);
|
2013-05-11 11:17:44 +00:00
|
|
|
vput(vp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
|
|
|
|
{
|
|
|
|
struct vnode *vp;
|
|
|
|
struct null_node *xp;
|
|
|
|
|
|
|
|
vp = null_hashget(mp, lowervp);
|
|
|
|
if (vp == NULL)
|
|
|
|
return;
|
|
|
|
xp = VTONULL(vp);
|
|
|
|
xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
|
|
|
|
vhold(vp);
|
|
|
|
vunref(vp);
|
|
|
|
|
|
|
|
if (vp->v_usecount == 0) {
|
2013-05-21 11:31:56 +00:00
|
|
|
/*
|
|
|
|
* If vunref() dropped the last use reference on the
|
|
|
|
* nullfs vnode, it must be reclaimed, and its lock
|
|
|
|
* was split from the lower vnode lock. Need to do
|
|
|
|
* extra unlock before allowing the final vdrop() to
|
|
|
|
* free the vnode.
|
|
|
|
*/
|
2013-05-11 11:17:44 +00:00
|
|
|
KASSERT((vp->v_iflag & VI_DOOMED) != 0,
|
2013-05-21 11:31:56 +00:00
|
|
|
("not reclaimed nullfs vnode %p", vp));
|
2013-05-11 11:17:44 +00:00
|
|
|
VOP_UNLOCK(vp, 0);
|
2013-05-21 11:31:56 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Otherwise, the nullfs vnode still shares the lock
|
|
|
|
* with the lower vnode, and must not be unlocked.
|
|
|
|
* Also clear the NULLV_NOUNLOCK, the flag is not
|
|
|
|
* relevant for future reclamations.
|
|
|
|
*/
|
|
|
|
ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
|
|
|
|
KASSERT((vp->v_iflag & VI_DOOMED) == 0,
|
|
|
|
("reclaimed nullfs vnode %p", vp));
|
|
|
|
xp->null_flags &= ~NULLV_NOUNLOCK;
|
2013-05-11 11:17:44 +00:00
|
|
|
}
|
|
|
|
vdrop(vp);
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
}
|
1999-12-19 06:08:07 +00:00
|
|
|
|
1995-12-11 09:24:58 +00:00
|
|
|
static struct vfsops null_vfsops = {
|
2003-06-12 20:48:38 +00:00
|
|
|
.vfs_extattrctl = nullfs_extattrctl,
|
|
|
|
.vfs_fhtovp = nullfs_fhtovp,
|
|
|
|
.vfs_init = nullfs_init,
|
2004-07-30 22:08:52 +00:00
|
|
|
.vfs_mount = nullfs_mount,
|
2003-06-12 20:48:38 +00:00
|
|
|
.vfs_quotactl = nullfs_quotactl,
|
|
|
|
.vfs_root = nullfs_root,
|
|
|
|
.vfs_statfs = nullfs_statfs,
|
|
|
|
.vfs_sync = nullfs_sync,
|
|
|
|
.vfs_uninit = nullfs_uninit,
|
|
|
|
.vfs_unmount = nullfs_unmount,
|
|
|
|
.vfs_vget = nullfs_vget,
|
Allow shared lookups for nullfs mounts, if lower filesystem supports
it. There are two problems which shall be addressed for shared
lookups use to have measurable effect on nullfs scalability:
1. When vfs_lookup() calls VOP_LOOKUP() for nullfs, which passes lookup
operation to lower fs, resulting vnode is often only shared-locked. Then
null_nodeget() cannot instantiate covering vnode for lower vnode, since
insmntque1() and null_hashins() require exclusive lock on the lower.
Change the assert that lower vnode is exclusively locked to only
require any lock. If null hash failed to find pre-existing nullfs
vnode for lower vnode and the vnode is shared-locked, the lower vnode
lock is upgraded.
2. Nullfs reclaims its vnodes on deactivation. This is due to nullfs
inability to detect reclamation of the lower vnode. Reclamation of a
nullfs vnode at deactivation time prevents a reference to the lower
vnode to become stale.
Change nullfs VOP_INACTIVE to not reclaim the vnode, instead use the
VFS_RECLAIM_LOWERVP to get notification and reclaim upper vnode
together with the reclamation of the lower vnode.
Note that nullfs reclamation procedure calls vput() on the lowervp
vnode, temporary unlocking the vnode being reclaimed. This seems to be
fine for MPSAFE filesystems, but not-MPSAFE code often put partially
initialized vnode on some globally visible list, and later can decide
that half-constructed vnode is not needed. If nullfs mount is created
above such filesystem, then other threads might catch such not
properly initialized vnode. Instead of trying to overcome this case,
e.g. by recursing the lower vnode lock in null_reclaim_lowervp(), I
decided to rely on nearby removal of the support for non-MPSAFE
filesystems.
In collaboration with: pho
MFC after: 3 weeks
2012-09-09 19:20:23 +00:00
|
|
|
.vfs_reclaim_lowervp = nullfs_reclaim_lowervp,
|
2013-05-11 11:17:44 +00:00
|
|
|
.vfs_unlink_lowervp = nullfs_unlink_lowervp,
|
1994-05-24 10:09:53 +00:00
|
|
|
};
|
1994-09-21 03:47:43 +00:00
|
|
|
|
2012-02-09 10:39:01 +00:00
|
|
|
VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
|