tmpfs: plug holes on rw->ro mount update.
In particular: - suspend the mount around vflush() to avoid new writes come after the vnode is processed; - flush pending metadata updates (mostly node times); - remap all rw mappings of files from the mount into ro. It is not clear to me how to handle writeable mappings on rw->ro for tmpfs best. Other filesystems, which use vnode vm object, call vgone() on vnodes with writers, which sets the vm object type to OBJT_DEAD, and keep the resident pages and installed ptes as is. In particular, the existing mappings continue to work as far as application only accesses resident pages, but changes are not flushed to file. For tmpfs the vm object of VREG vnodes also serves as the data pages container, giving single copy of the mapped pages, so it cannot be set to OBJT_DEAD. Alternatives for making rw mappings ro could be either invalidating them at all, or marking as CoW. Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D19737
This commit is contained in:
parent
e1cdc30faa
commit
5c4ce6fac2
@ -60,10 +60,15 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_param.h>
|
||||
|
||||
@ -137,6 +142,227 @@ tmpfs_node_fini(void *mem, int size)
|
||||
mtx_destroy(&node->tn_interlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle updates of time from writes to mmaped regions. Use
|
||||
* MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_ACTIVE, since
|
||||
* unmap of the tmpfs-backed vnode does not call vinactive(), due to
|
||||
* vm object type is OBJT_SWAP.
|
||||
* If lazy, only handle delayed update of mtime due to the writes to
|
||||
* mapped files.
|
||||
*/
|
||||
static void
|
||||
tmpfs_update_mtime(struct mount *mp, bool lazy)
|
||||
{
|
||||
struct vnode *vp, *mvp;
|
||||
struct vm_object *obj;
|
||||
|
||||
MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
|
||||
if (vp->v_type != VREG) {
|
||||
VI_UNLOCK(vp);
|
||||
continue;
|
||||
}
|
||||
obj = vp->v_object;
|
||||
KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) ==
|
||||
(OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj"));
|
||||
|
||||
/*
|
||||
* In lazy case, do unlocked read, avoid taking vnode
|
||||
* lock if not needed. Lost update will be handled on
|
||||
* the next call.
|
||||
* For non-lazy case, we must flush all pending
|
||||
* metadata changes now.
|
||||
*/
|
||||
if (!lazy || (obj->flags & OBJ_TMPFS_DIRTY) != 0) {
|
||||
if (vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK,
|
||||
curthread) != 0)
|
||||
continue;
|
||||
tmpfs_check_mtime(vp);
|
||||
if (!lazy)
|
||||
tmpfs_update(vp);
|
||||
vput(vp);
|
||||
} else {
|
||||
VI_UNLOCK(vp);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct tmpfs_check_rw_maps_arg {
|
||||
bool found;
|
||||
};
|
||||
|
||||
static bool
|
||||
tmpfs_check_rw_maps_cb(struct mount *mp __unused, vm_map_t map __unused,
|
||||
vm_map_entry_t entry __unused, void *arg)
|
||||
{
|
||||
struct tmpfs_check_rw_maps_arg *a;
|
||||
|
||||
a = arg;
|
||||
a->found = true;
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Revoke write permissions from all mappings of regular files
|
||||
* belonging to the specified tmpfs mount.
|
||||
*/
|
||||
static bool
|
||||
tmpfs_revoke_rw_maps_cb(struct mount *mp __unused, vm_map_t map,
|
||||
vm_map_entry_t entry, void *arg __unused)
|
||||
{
|
||||
|
||||
/*
|
||||
* XXXKIB: might be invalidate the mapping
|
||||
* instead ? The process is not going to be
|
||||
* happy in any case.
|
||||
*/
|
||||
entry->max_protection &= ~VM_PROT_WRITE;
|
||||
if ((entry->protection & VM_PROT_WRITE) != 0) {
|
||||
entry->protection &= ~VM_PROT_WRITE;
|
||||
pmap_protect(map->pmap, entry->start, entry->end,
|
||||
entry->protection);
|
||||
}
|
||||
return (false);
|
||||
}
|
||||
|
||||
static void
|
||||
tmpfs_all_rw_maps(struct mount *mp, bool (*cb)(struct mount *mp, vm_map_t,
|
||||
vm_map_entry_t, void *), void *cb_arg)
|
||||
{
|
||||
struct proc *p;
|
||||
struct vmspace *vm;
|
||||
vm_map_t map;
|
||||
vm_map_entry_t entry;
|
||||
vm_object_t object;
|
||||
struct vnode *vp;
|
||||
int gen;
|
||||
bool terminate;
|
||||
|
||||
terminate = false;
|
||||
sx_slock(&allproc_lock);
|
||||
again:
|
||||
gen = allproc_gen;
|
||||
FOREACH_PROC_IN_SYSTEM(p) {
|
||||
PROC_LOCK(p);
|
||||
if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
|
||||
P_SYSTEM | P_WEXIT)) != 0) {
|
||||
PROC_UNLOCK(p);
|
||||
continue;
|
||||
}
|
||||
vm = vmspace_acquire_ref(p);
|
||||
_PHOLD_LITE(p);
|
||||
PROC_UNLOCK(p);
|
||||
if (vm == NULL) {
|
||||
PRELE(p);
|
||||
continue;
|
||||
}
|
||||
sx_sunlock(&allproc_lock);
|
||||
map = &vm->vm_map;
|
||||
|
||||
vm_map_lock(map);
|
||||
if (map->busy)
|
||||
vm_map_wait_busy(map);
|
||||
for (entry = map->header.next; entry != &map->header;
|
||||
entry = entry->next) {
|
||||
if ((entry->eflags & (MAP_ENTRY_GUARD |
|
||||
MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_COW)) != 0 ||
|
||||
(entry->max_protection & VM_PROT_WRITE) == 0)
|
||||
continue;
|
||||
object = entry->object.vm_object;
|
||||
if (object == NULL || object->type != OBJT_SWAP ||
|
||||
(object->flags & OBJ_TMPFS_NODE) == 0)
|
||||
continue;
|
||||
/*
|
||||
* No need to dig into shadow chain, mapping
|
||||
* of the object not at top is readonly.
|
||||
*/
|
||||
|
||||
VM_OBJECT_RLOCK(object);
|
||||
if (object->type == OBJT_DEAD) {
|
||||
VM_OBJECT_RUNLOCK(object);
|
||||
continue;
|
||||
}
|
||||
MPASS(object->ref_count > 1);
|
||||
if ((object->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) !=
|
||||
(OBJ_TMPFS_NODE | OBJ_TMPFS)) {
|
||||
VM_OBJECT_RUNLOCK(object);
|
||||
continue;
|
||||
}
|
||||
vp = object->un_pager.swp.swp_tmpfs;
|
||||
if (vp->v_mount != mp) {
|
||||
VM_OBJECT_RUNLOCK(object);
|
||||
continue;
|
||||
}
|
||||
|
||||
terminate = cb(mp, map, entry, cb_arg);
|
||||
VM_OBJECT_RUNLOCK(object);
|
||||
if (terminate)
|
||||
break;
|
||||
}
|
||||
vm_map_unlock(map);
|
||||
|
||||
vmspace_free(vm);
|
||||
sx_slock(&allproc_lock);
|
||||
PRELE(p);
|
||||
if (terminate)
|
||||
break;
|
||||
}
|
||||
if (!terminate && gen != allproc_gen)
|
||||
goto again;
|
||||
sx_sunlock(&allproc_lock);
|
||||
}
|
||||
|
||||
static bool
|
||||
tmpfs_check_rw_maps(struct mount *mp)
|
||||
{
|
||||
struct tmpfs_check_rw_maps_arg ca;
|
||||
|
||||
ca.found = false;
|
||||
tmpfs_all_rw_maps(mp, tmpfs_check_rw_maps_cb, &ca);
|
||||
return (ca.found);
|
||||
}
|
||||
|
||||
static int
|
||||
tmpfs_rw_to_ro(struct mount *mp)
|
||||
{
|
||||
int error, flags;
|
||||
bool forced;
|
||||
|
||||
forced = (mp->mnt_flag & MNT_FORCE) != 0;
|
||||
flags = WRITECLOSE | (forced ? FORCECLOSE : 0);
|
||||
|
||||
if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
|
||||
return (error);
|
||||
error = vfs_write_suspend_umnt(mp);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
if (!forced && tmpfs_check_rw_maps(mp)) {
|
||||
error = EBUSY;
|
||||
goto out;
|
||||
}
|
||||
VFS_TO_TMPFS(mp)->tm_ronly = 1;
|
||||
MNT_ILOCK(mp);
|
||||
mp->mnt_flag |= MNT_RDONLY;
|
||||
MNT_IUNLOCK(mp);
|
||||
for (;;) {
|
||||
tmpfs_all_rw_maps(mp, tmpfs_revoke_rw_maps_cb, NULL);
|
||||
tmpfs_update_mtime(mp, false);
|
||||
error = vflush(mp, 0, flags, curthread);
|
||||
if (error != 0) {
|
||||
VFS_TO_TMPFS(mp)->tm_ronly = 0;
|
||||
MNT_ILOCK(mp);
|
||||
mp->mnt_flag &= ~MNT_RDONLY;
|
||||
MNT_IUNLOCK(mp);
|
||||
goto out;
|
||||
}
|
||||
if (!tmpfs_check_rw_maps(mp))
|
||||
break;
|
||||
}
|
||||
out:
|
||||
vfs_write_resume(mp, 0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
tmpfs_mount(struct mount *mp)
|
||||
{
|
||||
@ -144,7 +370,7 @@ tmpfs_mount(struct mount *mp)
|
||||
sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node));
|
||||
struct tmpfs_mount *tmp;
|
||||
struct tmpfs_node *root;
|
||||
int error, flags;
|
||||
int error;
|
||||
bool nonc;
|
||||
/* Size counters. */
|
||||
u_quad_t pages;
|
||||
@ -178,19 +404,7 @@ tmpfs_mount(struct mount *mp)
|
||||
if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) &&
|
||||
!(VFS_TO_TMPFS(mp)->tm_ronly)) {
|
||||
/* RW -> RO */
|
||||
error = VFS_SYNC(mp, MNT_WAIT);
|
||||
if (error)
|
||||
return (error);
|
||||
flags = WRITECLOSE;
|
||||
if (mp->mnt_flag & MNT_FORCE)
|
||||
flags |= FORCECLOSE;
|
||||
error = vflush(mp, 0, flags, curthread);
|
||||
if (error)
|
||||
return (error);
|
||||
VFS_TO_TMPFS(mp)->tm_ronly = 1;
|
||||
MNT_ILOCK(mp);
|
||||
mp->mnt_flag |= MNT_RDONLY;
|
||||
MNT_IUNLOCK(mp);
|
||||
return (tmpfs_rw_to_ro(mp));
|
||||
} else if (!vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) &&
|
||||
VFS_TO_TMPFS(mp)->tm_ronly) {
|
||||
/* RO -> RW */
|
||||
@ -469,45 +683,13 @@ tmpfs_statfs(struct mount *mp, struct statfs *sbp)
|
||||
static int
|
||||
tmpfs_sync(struct mount *mp, int waitfor)
|
||||
{
|
||||
struct vnode *vp, *mvp;
|
||||
struct vm_object *obj;
|
||||
|
||||
if (waitfor == MNT_SUSPEND) {
|
||||
MNT_ILOCK(mp);
|
||||
mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
|
||||
MNT_IUNLOCK(mp);
|
||||
} else if (waitfor == MNT_LAZY) {
|
||||
/*
|
||||
* Handle lazy updates of mtime from writes to mmaped
|
||||
* regions. Use MNT_VNODE_FOREACH_ALL instead of
|
||||
* MNT_VNODE_FOREACH_ACTIVE, since unmap of the
|
||||
* tmpfs-backed vnode does not call vinactive(), due
|
||||
* to vm object type is OBJT_SWAP.
|
||||
*/
|
||||
MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
|
||||
if (vp->v_type != VREG) {
|
||||
VI_UNLOCK(vp);
|
||||
continue;
|
||||
}
|
||||
obj = vp->v_object;
|
||||
KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) ==
|
||||
(OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj"));
|
||||
|
||||
/*
|
||||
* Unlocked read, avoid taking vnode lock if
|
||||
* not needed. Lost update will be handled on
|
||||
* the next call.
|
||||
*/
|
||||
if ((obj->flags & OBJ_TMPFS_DIRTY) == 0) {
|
||||
VI_UNLOCK(vp);
|
||||
continue;
|
||||
}
|
||||
if (vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK,
|
||||
curthread) != 0)
|
||||
continue;
|
||||
tmpfs_check_mtime(vp);
|
||||
vput(vp);
|
||||
}
|
||||
tmpfs_update_mtime(mp, true);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user