Account the writeable shared mappings backed by file in the vnode
v_writecount. Keep the amount of the virtual address space used by the mappings in the new vm_object un_pager.vnp.writemappings counter. The vnode v_writecount is incremented when writemappings gets non-zero value, and decremented when writemappings is returned to zero. Writeable shared vnode-backed mappings are accounted for in vm_mmap(), and vm_map_insert() is instructed to set MAP_ENTRY_VN_WRITECNT flag on the created map entry. During deferred map entry deallocation, vm_map_process_deferred() checks for MAP_ENTRY_VN_WRITECOUNT and decrements writemappings for the vm object. Now, the writeable mount cannot be demoted to read-only while writeable shared mappings of the vnodes from the mount point exist. Also, execve(2) fails for such files with ETXTBUSY, as it should be. Noted by: tegge Reviewed by: tegge (long time ago, early version), alc Tested by: pho MFC after: 3 weeks
This commit is contained in:
parent
47190ea664
commit
84110e7e0b
@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_pager.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vnode_pager.h>
|
||||
#include <vm/swap_pager.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
@ -475,11 +476,23 @@ vm_map_process_deferred(void)
|
||||
{
|
||||
struct thread *td;
|
||||
vm_map_entry_t entry;
|
||||
vm_object_t object;
|
||||
|
||||
td = curthread;
|
||||
|
||||
while ((entry = td->td_map_def_user) != NULL) {
|
||||
td->td_map_def_user = entry->next;
|
||||
if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
|
||||
/*
|
||||
* Decrement the object's writemappings and
|
||||
* possibly the vnode's v_writecount.
|
||||
*/
|
||||
KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
|
||||
("Submap with writecount"));
|
||||
object = entry->object.vm_object;
|
||||
KASSERT(object != NULL, ("No object for writecount"));
|
||||
vnode_pager_release_writecount(object, entry->start,
|
||||
entry->end);
|
||||
}
|
||||
vm_map_entry_deallocate(entry, FALSE);
|
||||
}
|
||||
}
|
||||
@ -1174,6 +1187,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
|
||||
protoeflags |= MAP_ENTRY_NOSYNC;
|
||||
if (cow & MAP_DISABLE_COREDUMP)
|
||||
protoeflags |= MAP_ENTRY_NOCOREDUMP;
|
||||
if (cow & MAP_VN_WRITECOUNT)
|
||||
protoeflags |= MAP_ENTRY_VN_WRITECNT;
|
||||
if (cow & MAP_INHERIT_SHARE)
|
||||
inheritance = VM_INHERIT_SHARE;
|
||||
else
|
||||
@ -1516,6 +1531,11 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
|
||||
* references. Thus, the map lock can be kept
|
||||
* without causing a lock-order reversal with
|
||||
* the vnode lock.
|
||||
*
|
||||
* Since we count the number of virtual page
|
||||
* mappings in object->un_pager.vnp.writemappings,
|
||||
* the writemappings value should not be adjusted
|
||||
* when the entry is disposed of.
|
||||
*/
|
||||
if (prev->object.vm_object)
|
||||
vm_object_deallocate(prev->object.vm_object);
|
||||
@ -1627,6 +1647,13 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
|
||||
|
||||
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
|
||||
vm_object_reference(new_entry->object.vm_object);
|
||||
/*
|
||||
* The object->un_pager.vnp.writemappings for the
|
||||
* object of MAP_ENTRY_VN_WRITECNT type entry shall be
|
||||
* kept as is here. The virtual pages are
|
||||
* re-distributed among the clipped entries, so the sum is
|
||||
* left the same.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@ -2900,6 +2927,7 @@ vm_map_copy_entry(
|
||||
vm_ooffset_t *fork_charge)
|
||||
{
|
||||
vm_object_t src_object;
|
||||
vm_map_entry_t fake_entry;
|
||||
vm_offset_t size;
|
||||
struct ucred *cred;
|
||||
int charged;
|
||||
@ -2965,6 +2993,27 @@ vm_map_copy_entry(
|
||||
src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
|
||||
dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
|
||||
dst_entry->offset = src_entry->offset;
|
||||
if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
|
||||
/*
|
||||
* MAP_ENTRY_VN_WRITECNT cannot
|
||||
* indicate write reference from
|
||||
* src_entry, since the entry is
|
||||
* marked as needs copy. Allocate a
|
||||
* fake entry that is used to
|
||||
* decrement object->un_pager.vnp.writecount
|
||||
* at the appropriate time. Attach
|
||||
* fake_entry to the deferred list.
|
||||
*/
|
||||
fake_entry = vm_map_entry_create(dst_map);
|
||||
fake_entry->eflags = MAP_ENTRY_VN_WRITECNT;
|
||||
src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT;
|
||||
vm_object_reference(src_object);
|
||||
fake_entry->object.vm_object = src_object;
|
||||
fake_entry->start = src_entry->start;
|
||||
fake_entry->end = src_entry->end;
|
||||
fake_entry->next = curthread->td_map_def_user;
|
||||
curthread->td_map_def_user = fake_entry;
|
||||
}
|
||||
} else {
|
||||
dst_entry->object.vm_object = NULL;
|
||||
dst_entry->offset = 0;
|
||||
@ -3043,6 +3092,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
vm_map_lock(old_map);
|
||||
if (old_map->busy)
|
||||
vm_map_wait_busy(old_map);
|
||||
new_map = NULL; /* silence gcc */
|
||||
vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
|
||||
if (vm2 == NULL)
|
||||
goto unlock_and_return;
|
||||
@ -3122,6 +3172,16 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
|
||||
MAP_ENTRY_IN_TRANSITION);
|
||||
new_entry->wired_count = 0;
|
||||
if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
|
||||
object = new_entry->object.vm_object;
|
||||
KASSERT(((struct vnode *)object->handle)->
|
||||
v_writecount > 0,
|
||||
("vmspace_fork: v_writecount"));
|
||||
KASSERT(object->un_pager.vnp.writemappings > 0,
|
||||
("vmspace_fork: vnp.writecount"));
|
||||
vnode_pager_update_writecount(object,
|
||||
new_entry->start, new_entry->end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the entry into the new map -- we know we're
|
||||
@ -3146,8 +3206,11 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
*/
|
||||
new_entry = vm_map_entry_create(new_map);
|
||||
*new_entry = *old_entry;
|
||||
/*
|
||||
* Copied entry is COW over the old object.
|
||||
*/
|
||||
new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
|
||||
MAP_ENTRY_IN_TRANSITION);
|
||||
MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT);
|
||||
new_entry->wired_count = 0;
|
||||
new_entry->object.vm_object = NULL;
|
||||
new_entry->cred = NULL;
|
||||
@ -3161,9 +3224,15 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
old_entry = old_entry->next;
|
||||
}
|
||||
unlock_and_return:
|
||||
vm_map_unlock(old_map);
|
||||
/*
|
||||
* Use inlined vm_map_unlock() to postpone handling the deferred
|
||||
* map entries, which cannot be done until both old_map and
|
||||
* new_map locks are released.
|
||||
*/
|
||||
sx_xunlock(&old_map->lock);
|
||||
if (vm2 != NULL)
|
||||
vm_map_unlock(new_map);
|
||||
sx_xunlock(&new_map->lock);
|
||||
vm_map_process_deferred();
|
||||
|
||||
return (vm2);
|
||||
}
|
||||
|
@ -139,6 +139,7 @@ struct vm_map_entry {
|
||||
#define MAP_ENTRY_GROWS_UP 0x2000 /* Bottom-up stacks */
|
||||
|
||||
#define MAP_ENTRY_WIRE_SKIPPED 0x4000
|
||||
#define MAP_ENTRY_VN_WRITECNT 0x8000 /* writeable vnode mapping */
|
||||
|
||||
#ifdef _KERNEL
|
||||
static __inline u_char
|
||||
@ -315,6 +316,7 @@ long vmspace_wired_count(struct vmspace *vmspace);
|
||||
#define MAP_DISABLE_SYNCER 0x0020
|
||||
#define MAP_DISABLE_COREDUMP 0x0100
|
||||
#define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */
|
||||
#define MAP_VN_WRITECOUNT 0x0400
|
||||
#define MAP_STACK_GROWS_DOWN 0x1000
|
||||
#define MAP_STACK_GROWS_UP 0x2000
|
||||
#define MAP_ACC_CHARGED 0x4000
|
||||
|
@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_pageout.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vnode_pager.h>
|
||||
|
||||
#ifdef HWPMC_HOOKS
|
||||
#include <sys/pmckern.h>
|
||||
@ -93,7 +94,7 @@ struct sbrk_args {
|
||||
#endif
|
||||
|
||||
static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
|
||||
int *, struct vnode *, vm_ooffset_t *, vm_object_t *);
|
||||
int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
|
||||
static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
|
||||
int *, struct cdev *, vm_ooffset_t *, vm_object_t *);
|
||||
static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
|
||||
@ -1218,28 +1219,33 @@ sys_munlock(td, uap)
|
||||
/*
|
||||
* vm_mmap_vnode()
|
||||
*
|
||||
* MPSAFE
|
||||
*
|
||||
* Helper function for vm_mmap. Perform sanity check specific for mmap
|
||||
* operations on vnodes.
|
||||
*
|
||||
* For VCHR vnodes, the vnode lock is held over the call to
|
||||
* vm_mmap_cdev() to keep vp->v_rdev valid.
|
||||
*/
|
||||
int
|
||||
vm_mmap_vnode(struct thread *td, vm_size_t objsize,
|
||||
vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
|
||||
struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp)
|
||||
struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
|
||||
boolean_t *writecounted)
|
||||
{
|
||||
struct vattr va;
|
||||
vm_object_t obj;
|
||||
vm_offset_t foff;
|
||||
struct mount *mp;
|
||||
struct ucred *cred;
|
||||
int error, flags;
|
||||
int vfslocked;
|
||||
int error, flags, locktype, vfslocked;
|
||||
|
||||
mp = vp->v_mount;
|
||||
cred = td->td_ucred;
|
||||
if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED))
|
||||
locktype = LK_EXCLUSIVE;
|
||||
else
|
||||
locktype = LK_SHARED;
|
||||
vfslocked = VFS_LOCK_GIANT(mp);
|
||||
if ((error = vget(vp, LK_SHARED, td)) != 0) {
|
||||
if ((error = vget(vp, locktype, td)) != 0) {
|
||||
VFS_UNLOCK_GIANT(vfslocked);
|
||||
return (error);
|
||||
}
|
||||
@ -1256,8 +1262,20 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
|
||||
}
|
||||
if (obj->handle != vp) {
|
||||
vput(vp);
|
||||
vp = (struct vnode*)obj->handle;
|
||||
vget(vp, LK_SHARED, td);
|
||||
vp = (struct vnode *)obj->handle;
|
||||
/*
|
||||
* Bypass filesystems obey the mpsafety of the
|
||||
* underlying fs.
|
||||
*/
|
||||
error = vget(vp, locktype, td);
|
||||
if (error != 0) {
|
||||
VFS_UNLOCK_GIANT(vfslocked);
|
||||
return (error);
|
||||
}
|
||||
if (locktype == LK_EXCLUSIVE) {
|
||||
*writecounted = TRUE;
|
||||
vnode_pager_update_writecount(obj, 0, objsize);
|
||||
}
|
||||
}
|
||||
} else if (vp->v_type == VCHR) {
|
||||
error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp,
|
||||
@ -1293,7 +1311,7 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
|
||||
objsize = round_page(va.va_size);
|
||||
if (va.va_nlink == 0)
|
||||
flags |= MAP_NOSYNC;
|
||||
obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, td->td_ucred);
|
||||
obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff, cred);
|
||||
if (obj == NULL) {
|
||||
error = ENOMEM;
|
||||
goto done;
|
||||
@ -1432,6 +1450,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
|
||||
int rv = KERN_SUCCESS;
|
||||
int docow, error;
|
||||
struct thread *td = curthread;
|
||||
boolean_t writecounted;
|
||||
|
||||
if (size == 0)
|
||||
return (0);
|
||||
@ -1470,6 +1489,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
|
||||
return (EINVAL);
|
||||
fitit = FALSE;
|
||||
}
|
||||
writecounted = FALSE;
|
||||
|
||||
/*
|
||||
* Lookup/allocate object.
|
||||
*/
|
||||
@ -1480,7 +1501,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
|
||||
break;
|
||||
case OBJT_VNODE:
|
||||
error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
|
||||
handle, &foff, &object);
|
||||
handle, &foff, &object, &writecounted);
|
||||
break;
|
||||
case OBJT_SWAP:
|
||||
error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
|
||||
@ -1520,6 +1541,8 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
|
||||
/* Shared memory is also shared with children. */
|
||||
if (flags & MAP_SHARED)
|
||||
docow |= MAP_INHERIT_SHARE;
|
||||
if (writecounted)
|
||||
docow |= MAP_VN_WRITECOUNT;
|
||||
|
||||
if (flags & MAP_STACK)
|
||||
rv = vm_map_stack(map, *addr, size, prot, maxprot,
|
||||
@ -1537,7 +1560,12 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
|
||||
* Lose the object reference. Will destroy the
|
||||
* object if it's an unnamed anonymous mapping
|
||||
* or named anonymous without other references.
|
||||
*
|
||||
* If this mapping was accounted for in the vnode's
|
||||
* writecount, then undo that now.
|
||||
*/
|
||||
if (writecounted)
|
||||
vnode_pager_release_writecount(object, 0, size);
|
||||
vm_object_deallocate(object);
|
||||
}
|
||||
|
||||
|
@ -112,6 +112,7 @@ struct vm_object {
|
||||
*/
|
||||
struct {
|
||||
off_t vnp_size;
|
||||
vm_ooffset_t writemappings;
|
||||
} vnp;
|
||||
|
||||
/*
|
||||
|
@ -222,6 +222,7 @@ retry:
|
||||
object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
|
||||
|
||||
object->un_pager.vnp.vnp_size = size;
|
||||
object->un_pager.vnp.writemappings = 0;
|
||||
|
||||
object->handle = handle;
|
||||
VI_LOCK(vp);
|
||||
@ -268,10 +269,16 @@ vnode_pager_dealloc(object)
|
||||
wakeup(object);
|
||||
}
|
||||
ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
|
||||
if (object->un_pager.vnp.writemappings > 0) {
|
||||
object->un_pager.vnp.writemappings = 0;
|
||||
vp->v_writecount--;
|
||||
}
|
||||
vp->v_object = NULL;
|
||||
vp->v_vflag &= ~VV_TEXT;
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
while (refs-- > 0)
|
||||
vunref(vp);
|
||||
VM_OBJECT_LOCK(object);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
@ -1215,3 +1222,81 @@ vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written)
|
||||
}
|
||||
VM_OBJECT_UNLOCK(obj);
|
||||
}
|
||||
|
||||
void
|
||||
vnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
|
||||
vm_offset_t end)
|
||||
{
|
||||
struct vnode *vp;
|
||||
vm_ooffset_t old_wm;
|
||||
|
||||
VM_OBJECT_LOCK(object);
|
||||
if (object->type != OBJT_VNODE) {
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
return;
|
||||
}
|
||||
old_wm = object->un_pager.vnp.writemappings;
|
||||
object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start;
|
||||
vp = object->handle;
|
||||
if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) {
|
||||
ASSERT_VOP_ELOCKED(vp, "v_writecount inc");
|
||||
vp->v_writecount++;
|
||||
} else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) {
|
||||
ASSERT_VOP_ELOCKED(vp, "v_writecount dec");
|
||||
vp->v_writecount--;
|
||||
}
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
}
|
||||
|
||||
void
|
||||
vnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
|
||||
vm_offset_t end)
|
||||
{
|
||||
struct vnode *vp;
|
||||
struct mount *mp;
|
||||
vm_offset_t inc;
|
||||
int vfslocked;
|
||||
|
||||
VM_OBJECT_LOCK(object);
|
||||
|
||||
/*
|
||||
* First, recheck the object type to account for the race when
|
||||
* the vnode is reclaimed.
|
||||
*/
|
||||
if (object->type != OBJT_VNODE) {
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Optimize for the case when writemappings is not going to
|
||||
* zero.
|
||||
*/
|
||||
inc = end - start;
|
||||
if (object->un_pager.vnp.writemappings != inc) {
|
||||
object->un_pager.vnp.writemappings -= inc;
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
return;
|
||||
}
|
||||
|
||||
vp = object->handle;
|
||||
vhold(vp);
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
|
||||
mp = NULL;
|
||||
vn_start_write(vp, &mp, V_WAIT);
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
|
||||
/*
|
||||
* Decrement the object's writemappings, by swapping the start
|
||||
* and end arguments for vnode_pager_update_writecount(). If
|
||||
* there was not a race with vnode reclaimation, then the
|
||||
* vnode's v_writecount is decremented.
|
||||
*/
|
||||
vnode_pager_update_writecount(object, end, start);
|
||||
VOP_UNLOCK(vp, 0);
|
||||
vdrop(vp);
|
||||
if (mp != NULL)
|
||||
vn_finished_write(mp);
|
||||
VFS_UNLOCK_GIANT(vfslocked);
|
||||
}
|
||||
|
@ -46,7 +46,11 @@ int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m,
|
||||
int count, boolean_t sync,
|
||||
int *rtvals);
|
||||
|
||||
void vnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
|
||||
vm_offset_t end);
|
||||
void vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written);
|
||||
void vnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
|
||||
vm_offset_t end);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* _VNODE_PAGER_ */
|
||||
|
Loading…
x
Reference in New Issue
Block a user