Switch to use shared vnode locks for text files during image activation.
kern_execve() locks text vnode exclusive to be able to set and clear VV_TEXT flag. VV_TEXT is mutually exclusive with the v_writecount > 0 condition. The change removes VV_TEXT, replacing it with the condition v_writecount <= -1, and puts v_writecount under the vnode interlock. Each text reference decrements v_writecount. To clear the text reference when the segment is unmapped, it is recorded in the vm_map_entry backed by the text file as MAP_ENTRY_VN_TEXT flag, and v_writecount is incremented on the map entry removal The operations like VOP_ADD_WRITECOUNT() and VOP_SET_TEXT() check that v_writecount does not contradict the desired change. vn_writecheck() is now racy and its use was eliminated everywhere except access. Atomic check for writeability and increment of v_writecount is performed by the VOP. vn_truncate() now increments v_writecount around VOP_SETATTR() call, lack of which is arguably a bug on its own. nullfs bypasses v_writecount to the lower vnode always, so nullfs vnode has its own v_writecount correct, and lower vnode gets all references, since object->handle is always lower vnode. On the text vnode' vm object dealloc, the v_writecount value is reset to zero, and deadfs vop_unset_text short-circuit the operation. Reclamation of lowervp always reclaims all nullfs vnodes referencing lowervp first, so no stray references are left. Reviewed by: markj, trasz Tested by: mjg, pho Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D19923
This commit is contained in:
parent
ce1a272ee3
commit
2dc0d9edaa
@ -458,7 +458,7 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp)
|
||||
* others x bit is enabled.
|
||||
* mmap(2) does not allow to mmap with PROT_EXEC if
|
||||
* binary' file comes from noexec mount. We cannot
|
||||
* set VV_TEXT on the binary.
|
||||
* set a text reference on the binary.
|
||||
*/
|
||||
dir_enable = false;
|
||||
if (st.st_uid == geteuid()) {
|
||||
|
@ -258,13 +258,16 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
struct nameidata ni;
|
||||
struct vnode *vp;
|
||||
struct exec *a_out;
|
||||
vm_map_t map;
|
||||
vm_map_entry_t entry;
|
||||
struct vattr attr;
|
||||
vm_offset_t vmaddr;
|
||||
unsigned long file_offset;
|
||||
unsigned long bss_size;
|
||||
char *library;
|
||||
ssize_t aresid;
|
||||
int error, locked, writecount;
|
||||
int error;
|
||||
bool locked, opened, textset;
|
||||
|
||||
LCONVPATHEXIST(td, args->library, &library);
|
||||
|
||||
@ -274,8 +277,10 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
#endif
|
||||
|
||||
a_out = NULL;
|
||||
locked = 0;
|
||||
vp = NULL;
|
||||
locked = false;
|
||||
textset = false;
|
||||
opened = false;
|
||||
|
||||
NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
|
||||
UIO_SYSSPACE, library, td);
|
||||
@ -291,16 +296,7 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
* From here on down, we have a locked vnode that must be unlocked.
|
||||
* XXX: The code below largely duplicates exec_check_permissions().
|
||||
*/
|
||||
locked = 1;
|
||||
|
||||
/* Writable? */
|
||||
error = VOP_GET_WRITECOUNT(vp, &writecount);
|
||||
if (error != 0)
|
||||
goto cleanup;
|
||||
if (writecount != 0) {
|
||||
error = ETXTBSY;
|
||||
goto cleanup;
|
||||
}
|
||||
locked = true;
|
||||
|
||||
/* Executable? */
|
||||
error = VOP_GETATTR(vp, &attr, td->td_ucred);
|
||||
@ -339,6 +335,7 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
|
||||
if (error)
|
||||
goto cleanup;
|
||||
opened = true;
|
||||
|
||||
/* Pull in executable header into exec_map */
|
||||
error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
|
||||
@ -401,15 +398,16 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
|
||||
/*
|
||||
* Prevent more writers.
|
||||
* XXX: Note that if any of the VM operations fail below we don't
|
||||
* clear this flag.
|
||||
*/
|
||||
VOP_SET_TEXT(vp);
|
||||
error = VOP_SET_TEXT(vp);
|
||||
if (error != 0)
|
||||
goto cleanup;
|
||||
textset = true;
|
||||
|
||||
/*
|
||||
* Lock no longer needed
|
||||
*/
|
||||
locked = 0;
|
||||
locked = false;
|
||||
VOP_UNLOCK(vp, 0);
|
||||
|
||||
/*
|
||||
@ -456,11 +454,21 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
* Map it all into the process's space as a single
|
||||
* copy-on-write "data" segment.
|
||||
*/
|
||||
error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr,
|
||||
map = &td->td_proc->p_vmspace->vm_map;
|
||||
error = vm_mmap(map, &vmaddr,
|
||||
a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
|
||||
MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
|
||||
if (error)
|
||||
goto cleanup;
|
||||
vm_map_lock(map);
|
||||
if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
|
||||
vm_map_unlock(map);
|
||||
error = EDOOFUS;
|
||||
goto cleanup;
|
||||
}
|
||||
entry->eflags |= MAP_ENTRY_VN_EXEC;
|
||||
vm_map_unlock(map);
|
||||
textset = false;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0],
|
||||
@ -480,7 +488,14 @@ linux_uselib(struct thread *td, struct linux_uselib_args *args)
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* Unlock vnode if needed */
|
||||
if (opened) {
|
||||
if (locked)
|
||||
VOP_UNLOCK(vp, 0);
|
||||
locked = false;
|
||||
VOP_CLOSE(vp, FREAD, td->td_ucred, td);
|
||||
}
|
||||
if (textset)
|
||||
VOP_UNSET_TEXT_CHECKED(vp);
|
||||
if (locked)
|
||||
VOP_UNLOCK(vp, 0);
|
||||
|
||||
|
@ -47,6 +47,7 @@ static vop_lookup_t dead_lookup;
|
||||
static vop_open_t dead_open;
|
||||
static vop_getwritemount_t dead_getwritemount;
|
||||
static vop_rename_t dead_rename;
|
||||
static vop_unset_text_t dead_unset_text;
|
||||
|
||||
struct vop_vector dead_vnodeops = {
|
||||
.vop_default = &default_vnodeops,
|
||||
@ -76,6 +77,7 @@ struct vop_vector dead_vnodeops = {
|
||||
.vop_setattr = VOP_EBADF,
|
||||
.vop_symlink = VOP_PANIC,
|
||||
.vop_vptocnp = VOP_EBADF,
|
||||
.vop_unset_text = dead_unset_text,
|
||||
.vop_write = dead_write,
|
||||
};
|
||||
|
||||
@ -148,3 +150,10 @@ dead_rename(struct vop_rename_args *ap)
|
||||
vop_rename_fail(ap);
|
||||
return (EXDEV);
|
||||
}
|
||||
|
||||
static int
|
||||
dead_unset_text(struct vop_unset_text_args *ap)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -1639,7 +1639,7 @@ ncl_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td,
|
||||
}
|
||||
}
|
||||
/* ASSERT_VOP_LOCKED(vp, "ncl_doio"); */
|
||||
if (p && (vp->v_vflag & VV_TEXT)) {
|
||||
if (p && vp->v_writecount <= -1) {
|
||||
mtx_lock(&np->n_mtx);
|
||||
if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &np->n_vattr.na_mtime)) {
|
||||
mtx_unlock(&np->n_mtx);
|
||||
|
@ -3442,8 +3442,7 @@ nfs_set_text(struct vop_set_text_args *ap)
|
||||
np->n_mtime = np->n_vattr.na_mtime;
|
||||
mtx_unlock(&np->n_mtx);
|
||||
|
||||
vp->v_vflag |= VV_TEXT;
|
||||
return (0);
|
||||
return (vop_stdset_text(ap));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -339,15 +339,15 @@ null_add_writecount(struct vop_add_writecount_args *ap)
|
||||
|
||||
vp = ap->a_vp;
|
||||
lvp = NULLVPTOLOWERVP(vp);
|
||||
KASSERT(vp->v_writecount + ap->a_inc >= 0, ("wrong writecount inc"));
|
||||
if (vp->v_writecount > 0 && vp->v_writecount + ap->a_inc == 0)
|
||||
error = VOP_ADD_WRITECOUNT(lvp, -1);
|
||||
else if (vp->v_writecount == 0 && vp->v_writecount + ap->a_inc > 0)
|
||||
error = VOP_ADD_WRITECOUNT(lvp, 1);
|
||||
else
|
||||
error = 0;
|
||||
VI_LOCK(vp);
|
||||
/* text refs are bypassed to lowervp */
|
||||
VNASSERT(vp->v_writecount >= 0, vp, ("wrong null writecount"));
|
||||
VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
|
||||
("wrong writecount inc %d", ap->a_inc));
|
||||
error = VOP_ADD_WRITECOUNT(lvp, ap->a_inc);
|
||||
if (error == 0)
|
||||
vp->v_writecount += ap->a_inc;
|
||||
VI_UNLOCK(vp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -802,15 +802,17 @@ null_reclaim(struct vop_reclaim_args *ap)
|
||||
vp->v_data = NULL;
|
||||
vp->v_object = NULL;
|
||||
vp->v_vnlock = &vp->v_lock;
|
||||
VI_UNLOCK(vp);
|
||||
|
||||
/*
|
||||
* If we were opened for write, we leased one write reference
|
||||
* If we were opened for write, we leased the write reference
|
||||
* to the lower vnode. If this is a reclamation due to the
|
||||
* forced unmount, undo the reference now.
|
||||
*/
|
||||
if (vp->v_writecount > 0)
|
||||
VOP_ADD_WRITECOUNT(lowervp, -1);
|
||||
VOP_ADD_WRITECOUNT(lowervp, -vp->v_writecount);
|
||||
|
||||
VI_UNLOCK(vp);
|
||||
|
||||
if ((xp->null_flags & NULLV_NOUNLOCK) != 0)
|
||||
vunref(lowervp);
|
||||
else
|
||||
|
@ -941,10 +941,14 @@ unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
|
||||
vput(vp);
|
||||
goto unionfs_vn_create_on_upper_free_out1;
|
||||
}
|
||||
VOP_ADD_WRITECOUNT(vp, 1);
|
||||
error = VOP_ADD_WRITECOUNT(vp, 1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp,
|
||||
vp->v_writecount);
|
||||
*vpp = vp;
|
||||
if (error == 0) {
|
||||
*vpp = vp;
|
||||
} else {
|
||||
VOP_CLOSE(vp, fmode, cred, td);
|
||||
}
|
||||
|
||||
unionfs_vn_create_on_upper_free_out1:
|
||||
VOP_UNLOCK(udvp, LK_RELEASE);
|
||||
@ -1078,7 +1082,7 @@ unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
|
||||
}
|
||||
}
|
||||
VOP_CLOSE(uvp, FWRITE, cred, td);
|
||||
VOP_ADD_WRITECOUNT(uvp, -1);
|
||||
VOP_ADD_WRITECOUNT_CHECKED(uvp, -1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
|
||||
uvp->v_writecount);
|
||||
|
||||
|
@ -247,8 +247,8 @@ exec_aout_imgact(struct image_params *imgp)
|
||||
/* data + bss can't exceed rlimit */
|
||||
a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) ||
|
||||
racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) {
|
||||
PROC_UNLOCK(imgp->proc);
|
||||
return (ENOMEM);
|
||||
PROC_UNLOCK(imgp->proc);
|
||||
return (ENOMEM);
|
||||
}
|
||||
PROC_UNLOCK(imgp->proc);
|
||||
|
||||
@ -267,7 +267,7 @@ exec_aout_imgact(struct image_params *imgp)
|
||||
*/
|
||||
error = exec_new_vmspace(imgp, &aout_sysvec);
|
||||
|
||||
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
@ -286,12 +286,13 @@ exec_aout_imgact(struct image_params *imgp)
|
||||
file_offset,
|
||||
virtual_offset, text_end,
|
||||
VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
|
||||
MAP_COPY_ON_WRITE | MAP_PREFAULT);
|
||||
MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC);
|
||||
if (error) {
|
||||
vm_map_unlock(map);
|
||||
vm_object_deallocate(object);
|
||||
return (error);
|
||||
}
|
||||
VOP_SET_TEXT_CHECKED(imgp->vp);
|
||||
data_end = text_end + a_out->a_data;
|
||||
if (a_out->a_data) {
|
||||
vm_object_reference(object);
|
||||
@ -299,12 +300,13 @@ exec_aout_imgact(struct image_params *imgp)
|
||||
file_offset + a_out->a_text,
|
||||
text_end, data_end,
|
||||
VM_PROT_ALL, VM_PROT_ALL,
|
||||
MAP_COPY_ON_WRITE | MAP_PREFAULT);
|
||||
MAP_COPY_ON_WRITE | MAP_PREFAULT | MAP_VN_EXEC);
|
||||
if (error) {
|
||||
vm_map_unlock(map);
|
||||
vm_object_deallocate(object);
|
||||
return (error);
|
||||
}
|
||||
VOP_SET_TEXT_CHECKED(imgp->vp);
|
||||
}
|
||||
|
||||
if (bss_size) {
|
||||
|
@ -526,13 +526,17 @@ __elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object,
|
||||
} else {
|
||||
vm_object_reference(object);
|
||||
rv = vm_map_fixed(map, object, offset, start, end - start,
|
||||
prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL);
|
||||
prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL |
|
||||
(object != NULL ? MAP_VN_EXEC : 0));
|
||||
if (rv != KERN_SUCCESS) {
|
||||
locked = VOP_ISLOCKED(imgp->vp);
|
||||
VOP_UNLOCK(imgp->vp, 0);
|
||||
vm_object_deallocate(object);
|
||||
vn_lock(imgp->vp, locked | LK_RETRY);
|
||||
return (rv);
|
||||
} else if (object != NULL) {
|
||||
MPASS(imgp->vp->v_object == object);
|
||||
VOP_SET_TEXT_CHECKED(imgp->vp);
|
||||
}
|
||||
}
|
||||
return (KERN_SUCCESS);
|
||||
@ -589,13 +593,8 @@ __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset,
|
||||
cow = MAP_COPY_ON_WRITE | MAP_PREFAULT |
|
||||
(prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP);
|
||||
|
||||
rv = __elfN(map_insert)(imgp, map,
|
||||
object,
|
||||
file_addr, /* file offset */
|
||||
map_addr, /* virtual start */
|
||||
map_addr + map_len,/* virtual end */
|
||||
prot,
|
||||
cow);
|
||||
rv = __elfN(map_insert)(imgp, map, object, file_addr,
|
||||
map_addr, map_addr + map_len, prot, cow);
|
||||
if (rv != KERN_SUCCESS)
|
||||
return (EINVAL);
|
||||
|
||||
@ -716,7 +715,7 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
|
||||
struct nameidata *nd;
|
||||
struct vattr *attr;
|
||||
struct image_params *imgp;
|
||||
u_long flags, rbase;
|
||||
u_long rbase;
|
||||
u_long base_addr = 0;
|
||||
int error;
|
||||
|
||||
@ -744,10 +743,8 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
|
||||
imgp->object = NULL;
|
||||
imgp->execlabel = NULL;
|
||||
|
||||
flags = FOLLOW | LOCKSHARED | LOCKLEAF;
|
||||
|
||||
again:
|
||||
NDINIT(nd, LOOKUP, flags, UIO_SYSSPACE, file, curthread);
|
||||
NDINIT(nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE, file,
|
||||
curthread);
|
||||
if ((error = namei(nd)) != 0) {
|
||||
nd->ni_vp = NULL;
|
||||
goto fail;
|
||||
@ -762,27 +759,6 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
|
||||
if (error)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Also make certain that the interpreter stays the same,
|
||||
* so set its VV_TEXT flag, too. Since this function is only
|
||||
* used to load the interpreter, the VV_TEXT is almost always
|
||||
* already set.
|
||||
*/
|
||||
if (VOP_IS_TEXT(nd->ni_vp) == 0) {
|
||||
if (VOP_ISLOCKED(nd->ni_vp) != LK_EXCLUSIVE) {
|
||||
/*
|
||||
* LK_UPGRADE could have resulted in dropping
|
||||
* the lock. Just try again from the start,
|
||||
* this time with exclusive vnode lock.
|
||||
*/
|
||||
vput(nd->ni_vp);
|
||||
flags &= ~LOCKSHARED;
|
||||
goto again;
|
||||
}
|
||||
|
||||
VOP_SET_TEXT(nd->ni_vp);
|
||||
}
|
||||
|
||||
error = exec_map_first_page(imgp);
|
||||
if (error)
|
||||
goto fail;
|
||||
@ -825,9 +801,11 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
|
||||
if (imgp->firstpage)
|
||||
exec_unmap_first_page(imgp);
|
||||
|
||||
if (nd->ni_vp)
|
||||
if (nd->ni_vp) {
|
||||
if (imgp->textset)
|
||||
VOP_UNSET_TEXT_CHECKED(nd->ni_vp);
|
||||
vput(nd->ni_vp);
|
||||
|
||||
}
|
||||
free(tempdata, M_TEMP);
|
||||
|
||||
return (error);
|
||||
@ -961,7 +939,7 @@ __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr,
|
||||
if (interp == NULL) {
|
||||
VOP_UNLOCK(imgp->vp, 0);
|
||||
interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK);
|
||||
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
|
||||
}
|
||||
error = vn_rdwr(UIO_READ, imgp->vp, interp,
|
||||
interp_name_len, phdr->p_offset,
|
||||
@ -1228,7 +1206,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
maxv / 2, 1UL << flsl(maxalign));
|
||||
}
|
||||
|
||||
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
|
||||
if (error != 0)
|
||||
goto ret;
|
||||
|
||||
@ -1272,7 +1250,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
}
|
||||
error = __elfN(load_interp)(imgp, brand_info, interp, &addr,
|
||||
&imgp->entry_addr);
|
||||
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
|
||||
if (error != 0)
|
||||
goto ret;
|
||||
} else
|
||||
@ -1285,7 +1263,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
if (elf_auxargs == NULL) {
|
||||
VOP_UNLOCK(imgp->vp, 0);
|
||||
elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK);
|
||||
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
|
||||
}
|
||||
elf_auxargs->execfd = -1;
|
||||
elf_auxargs->phdr = proghdr + et_dyn_addr;
|
||||
@ -2570,7 +2548,7 @@ __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
|
||||
if (buf == NULL) {
|
||||
VOP_UNLOCK(imgp->vp, 0);
|
||||
buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK);
|
||||
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
|
||||
}
|
||||
error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz,
|
||||
pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED,
|
||||
|
@ -375,7 +375,6 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
#endif
|
||||
struct vnode *oldtextvp = NULL, *newtextvp;
|
||||
int credential_changing;
|
||||
int textset;
|
||||
#ifdef MAC
|
||||
struct label *interpvplabel = NULL;
|
||||
int will_transition;
|
||||
@ -423,8 +422,8 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
* interpreter if this is an interpreted binary.
|
||||
*/
|
||||
if (args->fname != NULL) {
|
||||
NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
|
||||
| AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
|
||||
NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
|
||||
SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
|
||||
}
|
||||
|
||||
SDT_PROBE1(proc, , , exec, args->fname);
|
||||
@ -457,13 +456,14 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp);
|
||||
if (error)
|
||||
goto exec_fail;
|
||||
vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(newtextvp, LK_SHARED | LK_RETRY);
|
||||
AUDIT_ARG_VNODE1(newtextvp);
|
||||
imgp->vp = newtextvp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check file permissions (also 'opens' file)
|
||||
* Check file permissions. Also 'opens' file and sets its vnode to
|
||||
* text mode.
|
||||
*/
|
||||
error = exec_check_permissions(imgp);
|
||||
if (error)
|
||||
@ -473,16 +473,6 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
if (imgp->object != NULL)
|
||||
vm_object_reference(imgp->object);
|
||||
|
||||
/*
|
||||
* Set VV_TEXT now so no one can write to the executable while we're
|
||||
* activating it.
|
||||
*
|
||||
* Remember if this was set before and unset it in case this is not
|
||||
* actually an executable image.
|
||||
*/
|
||||
textset = VOP_IS_TEXT(imgp->vp);
|
||||
VOP_SET_TEXT(imgp->vp);
|
||||
|
||||
error = exec_map_first_page(imgp);
|
||||
if (error)
|
||||
goto exec_fail_dealloc;
|
||||
@ -610,11 +600,8 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
}
|
||||
|
||||
if (error) {
|
||||
if (error == -1) {
|
||||
if (textset == 0)
|
||||
VOP_UNSET_TEXT(imgp->vp);
|
||||
if (error == -1)
|
||||
error = ENOEXEC;
|
||||
}
|
||||
goto exec_fail_dealloc;
|
||||
}
|
||||
|
||||
@ -625,12 +612,13 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
if (imgp->interpreted) {
|
||||
exec_unmap_first_page(imgp);
|
||||
/*
|
||||
* VV_TEXT needs to be unset for scripts. There is a short
|
||||
* period before we determine that something is a script where
|
||||
* VV_TEXT will be set. The vnode lock is held over this
|
||||
* entire period so nothing should illegitimately be blocked.
|
||||
* The text reference needs to be removed for scripts.
|
||||
* There is a short period before we determine that
|
||||
* something is a script where text reference is active.
|
||||
* The vnode lock is held over this entire period
|
||||
* so nothing should illegitimately be blocked.
|
||||
*/
|
||||
VOP_UNSET_TEXT(imgp->vp);
|
||||
VOP_UNSET_TEXT_CHECKED(imgp->vp);
|
||||
/* free name buffer and old vnode */
|
||||
if (args->fname != NULL)
|
||||
NDFREE(&nd, NDF_ONLY_PNBUF);
|
||||
@ -886,6 +874,8 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
|
||||
NDFREE(&nd, NDF_ONLY_PNBUF);
|
||||
if (imgp->opened)
|
||||
VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
|
||||
if (imgp->textset)
|
||||
VOP_UNSET_TEXT_CHECKED(imgp->vp);
|
||||
if (error != 0)
|
||||
vput(imgp->vp);
|
||||
else
|
||||
@ -1706,7 +1696,7 @@ exec_check_permissions(struct image_params *imgp)
|
||||
struct vnode *vp = imgp->vp;
|
||||
struct vattr *attr = imgp->attr;
|
||||
struct thread *td;
|
||||
int error, writecount;
|
||||
int error;
|
||||
|
||||
td = curthread;
|
||||
|
||||
@ -1750,12 +1740,17 @@ exec_check_permissions(struct image_params *imgp)
|
||||
/*
|
||||
* Check number of open-for-writes on the file and deny execution
|
||||
* if there are any.
|
||||
*
|
||||
* Add a text reference now so no one can write to the
|
||||
* executable while we're activating it.
|
||||
*
|
||||
* Remember if this was set before and unset it in case this is not
|
||||
* actually an executable image.
|
||||
*/
|
||||
error = VOP_GET_WRITECOUNT(vp, &writecount);
|
||||
error = VOP_SET_TEXT(vp);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
if (writecount != 0)
|
||||
return (ETXTBSY);
|
||||
imgp->textset = true;
|
||||
|
||||
/*
|
||||
* Call filesystem specific open routine (which does nothing in the
|
||||
|
@ -81,9 +81,7 @@ static int dirent_exists(struct vnode *vp, const char *dirname,
|
||||
#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
|
||||
|
||||
static int vop_stdis_text(struct vop_is_text_args *ap);
|
||||
static int vop_stdset_text(struct vop_set_text_args *ap);
|
||||
static int vop_stdunset_text(struct vop_unset_text_args *ap);
|
||||
static int vop_stdget_writecount(struct vop_get_writecount_args *ap);
|
||||
static int vop_stdadd_writecount(struct vop_add_writecount_args *ap);
|
||||
static int vop_stdfdatasync(struct vop_fdatasync_args *ap);
|
||||
static int vop_stdgetpages_async(struct vop_getpages_async_args *ap);
|
||||
@ -141,7 +139,6 @@ struct vop_vector default_vnodeops = {
|
||||
.vop_is_text = vop_stdis_text,
|
||||
.vop_set_text = vop_stdset_text,
|
||||
.vop_unset_text = vop_stdunset_text,
|
||||
.vop_get_writecount = vop_stdget_writecount,
|
||||
.vop_add_writecount = vop_stdadd_writecount,
|
||||
};
|
||||
|
||||
@ -1070,39 +1067,63 @@ static int
|
||||
vop_stdis_text(struct vop_is_text_args *ap)
|
||||
{
|
||||
|
||||
return ((ap->a_vp->v_vflag & VV_TEXT) != 0);
|
||||
return (ap->a_vp->v_writecount < 0);
|
||||
}
|
||||
|
||||
static int
|
||||
int
|
||||
vop_stdset_text(struct vop_set_text_args *ap)
|
||||
{
|
||||
struct vnode *vp;
|
||||
int error;
|
||||
|
||||
ap->a_vp->v_vflag |= VV_TEXT;
|
||||
return (0);
|
||||
vp = ap->a_vp;
|
||||
VI_LOCK(vp);
|
||||
if (vp->v_writecount > 0) {
|
||||
error = ETXTBSY;
|
||||
} else {
|
||||
vp->v_writecount--;
|
||||
error = 0;
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vop_stdunset_text(struct vop_unset_text_args *ap)
|
||||
{
|
||||
struct vnode *vp;
|
||||
int error;
|
||||
|
||||
ap->a_vp->v_vflag &= ~VV_TEXT;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
vop_stdget_writecount(struct vop_get_writecount_args *ap)
|
||||
{
|
||||
|
||||
*ap->a_writecount = ap->a_vp->v_writecount;
|
||||
return (0);
|
||||
vp = ap->a_vp;
|
||||
VI_LOCK(vp);
|
||||
if (vp->v_writecount < 0) {
|
||||
vp->v_writecount++;
|
||||
error = 0;
|
||||
} else {
|
||||
error = EINVAL;
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vop_stdadd_writecount(struct vop_add_writecount_args *ap)
|
||||
{
|
||||
struct vnode *vp;
|
||||
int error;
|
||||
|
||||
ap->a_vp->v_writecount += ap->a_inc;
|
||||
return (0);
|
||||
vp = ap->a_vp;
|
||||
VI_LOCK(vp);
|
||||
if (vp->v_writecount < 0) {
|
||||
error = ETXTBSY;
|
||||
} else {
|
||||
VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp,
|
||||
("neg writecount increment %d", ap->a_inc));
|
||||
vp->v_writecount += ap->a_inc;
|
||||
error = 0;
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3491,8 +3491,6 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
|
||||
strlcat(buf, "|VV_ETERNALDEV", sizeof(buf));
|
||||
if (vp->v_vflag & VV_CACHEDLABEL)
|
||||
strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf));
|
||||
if (vp->v_vflag & VV_TEXT)
|
||||
strlcat(buf, "|VV_TEXT", sizeof(buf));
|
||||
if (vp->v_vflag & VV_COPYONWRITE)
|
||||
strlcat(buf, "|VV_COPYONWRITE", sizeof(buf));
|
||||
if (vp->v_vflag & VV_SYSTEM)
|
||||
@ -3508,7 +3506,7 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
|
||||
if (vp->v_vflag & VV_FORCEINSMQ)
|
||||
strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf));
|
||||
flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV |
|
||||
VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
|
||||
VV_CACHEDLABEL | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP |
|
||||
VV_NOKNOTE | VV_DELETED | VV_MD | VV_FORCEINSMQ);
|
||||
if (flags != 0) {
|
||||
snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags);
|
||||
|
@ -294,6 +294,39 @@ vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags,
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vn_open_vnode_advlock(struct vnode *vp, int fmode, struct file *fp)
|
||||
{
|
||||
struct flock lf;
|
||||
int error, lock_flags, type;
|
||||
|
||||
ASSERT_VOP_LOCKED(vp, "vn_open_vnode_advlock");
|
||||
if ((fmode & (O_EXLOCK | O_SHLOCK)) == 0)
|
||||
return (0);
|
||||
KASSERT(fp != NULL, ("open with flock requires fp"));
|
||||
if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE)
|
||||
return (EOPNOTSUPP);
|
||||
|
||||
lock_flags = VOP_ISLOCKED(vp);
|
||||
VOP_UNLOCK(vp, 0);
|
||||
|
||||
lf.l_whence = SEEK_SET;
|
||||
lf.l_start = 0;
|
||||
lf.l_len = 0;
|
||||
lf.l_type = (fmode & O_EXLOCK) != 0 ? F_WRLCK : F_RDLCK;
|
||||
type = F_FLOCK;
|
||||
if ((fmode & FNONBLOCK) == 0)
|
||||
type |= F_WAIT;
|
||||
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
|
||||
if (error == 0)
|
||||
fp->f_flag |= FHASLOCK;
|
||||
|
||||
vn_lock(vp, lock_flags | LK_RETRY);
|
||||
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
|
||||
error = ENOENT;
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Common code for vnode open operations once a vnode is located.
|
||||
* Check permissions, and call the VOP_OPEN routine.
|
||||
@ -303,8 +336,7 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
|
||||
struct thread *td, struct file *fp)
|
||||
{
|
||||
accmode_t accmode;
|
||||
struct flock lf;
|
||||
int error, lock_flags, type;
|
||||
int error;
|
||||
|
||||
if (vp->v_type == VLNK)
|
||||
return (EMLINK);
|
||||
@ -335,63 +367,31 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
|
||||
|
||||
accmode &= ~(VCREAT | VVERIFY);
|
||||
#endif
|
||||
if ((fmode & O_CREAT) == 0) {
|
||||
if (accmode & VWRITE) {
|
||||
error = vn_writechk(vp);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
if (accmode) {
|
||||
error = VOP_ACCESS(vp, accmode, cred, td);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
if ((fmode & O_CREAT) == 0 && accmode != 0) {
|
||||
error = VOP_ACCESS(vp, accmode, cred, td);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
if (vp->v_type == VFIFO && VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
|
||||
vn_lock(vp, LK_UPGRADE | LK_RETRY);
|
||||
if ((error = VOP_OPEN(vp, fmode, cred, td, fp)) != 0)
|
||||
error = VOP_OPEN(vp, fmode, cred, td, fp);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
while ((fmode & (O_EXLOCK | O_SHLOCK)) != 0) {
|
||||
KASSERT(fp != NULL, ("open with flock requires fp"));
|
||||
if (fp->f_type != DTYPE_NONE && fp->f_type != DTYPE_VNODE) {
|
||||
error = EOPNOTSUPP;
|
||||
break;
|
||||
error = vn_open_vnode_advlock(vp, fmode, fp);
|
||||
if (error == 0 && (fmode & FWRITE) != 0) {
|
||||
error = VOP_ADD_WRITECOUNT(vp, 1);
|
||||
if (error == 0) {
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
|
||||
__func__, vp, vp->v_writecount);
|
||||
}
|
||||
lock_flags = VOP_ISLOCKED(vp);
|
||||
VOP_UNLOCK(vp, 0);
|
||||
lf.l_whence = SEEK_SET;
|
||||
lf.l_start = 0;
|
||||
lf.l_len = 0;
|
||||
if (fmode & O_EXLOCK)
|
||||
lf.l_type = F_WRLCK;
|
||||
else
|
||||
lf.l_type = F_RDLCK;
|
||||
type = F_FLOCK;
|
||||
if ((fmode & FNONBLOCK) == 0)
|
||||
type |= F_WAIT;
|
||||
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type);
|
||||
if (error == 0)
|
||||
fp->f_flag |= FHASLOCK;
|
||||
vn_lock(vp, lock_flags | LK_RETRY);
|
||||
if (error != 0)
|
||||
break;
|
||||
if ((vp->v_iflag & VI_DOOMED) != 0) {
|
||||
error = ENOENT;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Another thread might have used this vnode as an
|
||||
* executable while the vnode lock was dropped.
|
||||
* Ensure the vnode is still able to be opened for
|
||||
* writing after the lock has been obtained.
|
||||
*/
|
||||
if ((accmode & VWRITE) != 0)
|
||||
error = vn_writechk(vp);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Error from advlock or VOP_ADD_WRITECOUNT() still requires
|
||||
* calling VOP_CLOSE() to pair with earlier VOP_OPEN().
|
||||
* Arrange for that by having fdrop() to use vn_closefile().
|
||||
*/
|
||||
if (error != 0) {
|
||||
fp->f_flag |= FOPENFAILED;
|
||||
fp->f_vnode = vp;
|
||||
@ -400,18 +400,17 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
|
||||
fp->f_ops = &vnops;
|
||||
}
|
||||
vref(vp);
|
||||
} else if ((fmode & FWRITE) != 0) {
|
||||
VOP_ADD_WRITECOUNT(vp, 1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
|
||||
__func__, vp, vp->v_writecount);
|
||||
}
|
||||
|
||||
ASSERT_VOP_LOCKED(vp, "vn_open_vnode");
|
||||
return (error);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for write permissions on the specified vnode.
|
||||
* Prototype text segments cannot be written.
|
||||
* It is racy.
|
||||
*/
|
||||
int
|
||||
vn_writechk(struct vnode *vp)
|
||||
@ -449,9 +448,7 @@ vn_close1(struct vnode *vp, int flags, struct ucred *file_cred,
|
||||
vn_lock(vp, lock_flags | LK_RETRY);
|
||||
AUDIT_ARG_VNODE1(vp);
|
||||
if ((flags & (FWRITE | FOPENFAILED)) == FWRITE) {
|
||||
VNASSERT(vp->v_writecount > 0, vp,
|
||||
("vn_close: negative writecount"));
|
||||
VOP_ADD_WRITECOUNT(vp, -1);
|
||||
VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
|
||||
__func__, vp, vp->v_writecount);
|
||||
}
|
||||
@ -1319,13 +1316,14 @@ vn_truncate(struct file *fp, off_t length, struct ucred *active_cred,
|
||||
if (error)
|
||||
goto out;
|
||||
#endif
|
||||
error = vn_writechk(vp);
|
||||
error = VOP_ADD_WRITECOUNT(vp, 1);
|
||||
if (error == 0) {
|
||||
VATTR_NULL(&vattr);
|
||||
vattr.va_size = length;
|
||||
if ((fp->f_flag & O_FSYNC) != 0)
|
||||
vattr.va_vaflags |= VA_SYNC;
|
||||
error = VOP_SETATTR(vp, &vattr, fp->f_cred);
|
||||
VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
|
||||
}
|
||||
out:
|
||||
VOP_UNLOCK(vp, 0);
|
||||
|
@ -688,29 +688,21 @@ vop_is_text {
|
||||
};
|
||||
|
||||
|
||||
%% set_text vp E E E
|
||||
%% set_text vp = = =
|
||||
|
||||
vop_set_text {
|
||||
IN struct vnode *vp;
|
||||
};
|
||||
|
||||
|
||||
%% vop_unset_text vp E E E
|
||||
%% vop_unset_text vp = = =
|
||||
|
||||
vop_unset_text {
|
||||
IN struct vnode *vp;
|
||||
};
|
||||
|
||||
|
||||
%% get_writecount vp L L L
|
||||
|
||||
vop_get_writecount {
|
||||
IN struct vnode *vp;
|
||||
OUT int *writecount;
|
||||
};
|
||||
|
||||
|
||||
%% add_writecount vp E E E
|
||||
%% add_writecount vp L L L
|
||||
|
||||
vop_add_writecount {
|
||||
IN struct vnode *vp;
|
||||
|
@ -89,6 +89,7 @@ struct image_params {
|
||||
u_long stack_sz;
|
||||
struct ucred *newcred; /* new credentials if changing */
|
||||
bool credential_setid; /* true if becoming setid */
|
||||
bool textset;
|
||||
u_int map_flags;
|
||||
};
|
||||
|
||||
|
@ -169,7 +169,8 @@ struct vnode {
|
||||
u_int v_iflag; /* i vnode flags (see below) */
|
||||
u_int v_vflag; /* v vnode flags */
|
||||
u_int v_mflag; /* l mnt-specific vnode flags */
|
||||
int v_writecount; /* v ref count of writers */
|
||||
int v_writecount; /* I ref count of writers or
|
||||
(negative) text users */
|
||||
u_int v_hash;
|
||||
enum vtype v_type; /* u vnode type */
|
||||
};
|
||||
@ -244,7 +245,6 @@ struct xvnode {
|
||||
#define VV_NOSYNC 0x0004 /* unlinked, stop syncing */
|
||||
#define VV_ETERNALDEV 0x0008 /* device that is never destroyed */
|
||||
#define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */
|
||||
#define VV_TEXT 0x0020 /* vnode is a pure text prototype */
|
||||
#define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */
|
||||
#define VV_SYSTEM 0x0080 /* vnode being used by kernel */
|
||||
#define VV_PROCDEP 0x0100 /* vnode is process dependent */
|
||||
@ -749,6 +749,7 @@ int vop_stdadvlock(struct vop_advlock_args *ap);
|
||||
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
|
||||
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
|
||||
int vop_stdallocate(struct vop_allocate_args *ap);
|
||||
int vop_stdset_text(struct vop_set_text_args *ap);
|
||||
int vop_stdpathconf(struct vop_pathconf_args *);
|
||||
int vop_stdpoll(struct vop_poll_args *);
|
||||
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
|
||||
@ -828,6 +829,33 @@ void vop_rename_fail(struct vop_rename_args *ap);
|
||||
|
||||
#define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
|
||||
|
||||
#ifdef INVARIANTS
|
||||
#define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) \
|
||||
do { \
|
||||
int error_; \
|
||||
\
|
||||
error_ = VOP_ADD_WRITECOUNT((vp), (cnt)); \
|
||||
MPASS(error_ == 0); \
|
||||
} while (0)
|
||||
#define VOP_SET_TEXT_CHECKED(vp) \
|
||||
do { \
|
||||
int error_; \
|
||||
\
|
||||
error_ = VOP_SET_TEXT((vp)); \
|
||||
MPASS(error_ == 0); \
|
||||
} while (0)
|
||||
#define VOP_UNSET_TEXT_CHECKED(vp) \
|
||||
do { \
|
||||
int error_; \
|
||||
\
|
||||
error_ = VOP_UNSET_TEXT((vp)); \
|
||||
MPASS(error_ == 0); \
|
||||
} while (0)
|
||||
#else
|
||||
#define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) VOP_ADD_WRITECOUNT((vp), (cnt))
|
||||
#define VOP_SET_TEXT_CHECKED(vp) VOP_SET_TEXT((vp))
|
||||
#define VOP_UNSET_TEXT_CHECKED(vp) VOP_UNSET_TEXT((vp))
|
||||
#endif
|
||||
|
||||
void vput(struct vnode *vp);
|
||||
void vrele(struct vnode *vp);
|
||||
|
@ -338,7 +338,12 @@ ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp,
|
||||
return (error);
|
||||
}
|
||||
|
||||
VOP_ADD_WRITECOUNT(vp, 1);
|
||||
error = VOP_ADD_WRITECOUNT(vp, 1);
|
||||
if (error != 0) {
|
||||
VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
|
||||
VOP_UNLOCK(vp, 0);
|
||||
return (error);
|
||||
}
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", __func__, vp,
|
||||
vp->v_writecount);
|
||||
|
||||
|
@ -1667,6 +1667,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
|
||||
if (src_object != dst_object) {
|
||||
dst_entry->object.vm_object = dst_object;
|
||||
dst_entry->offset = 0;
|
||||
dst_entry->eflags &= ~MAP_ENTRY_VN_EXEC;
|
||||
}
|
||||
if (fork_charge != NULL) {
|
||||
KASSERT(dst_entry->cred == NULL,
|
||||
|
@ -506,6 +506,46 @@ _vm_map_lock(vm_map_t map, const char *file, int line)
|
||||
map->timestamp++;
|
||||
}
|
||||
|
||||
void
|
||||
vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add)
|
||||
{
|
||||
vm_object_t object, object1;
|
||||
struct vnode *vp;
|
||||
|
||||
if ((entry->eflags & MAP_ENTRY_VN_EXEC) == 0)
|
||||
return;
|
||||
KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
|
||||
("Submap with execs"));
|
||||
object = entry->object.vm_object;
|
||||
KASSERT(object != NULL, ("No object for text, entry %p", entry));
|
||||
VM_OBJECT_RLOCK(object);
|
||||
while ((object1 = object->backing_object) != NULL) {
|
||||
VM_OBJECT_RLOCK(object1);
|
||||
VM_OBJECT_RUNLOCK(object);
|
||||
object = object1;
|
||||
}
|
||||
|
||||
/*
|
||||
* For OBJT_DEAD objects, v_writecount was handled in
|
||||
* vnode_pager_dealloc().
|
||||
*/
|
||||
if (object->type != OBJT_DEAD) {
|
||||
KASSERT(((object->flags & OBJ_TMPFS) == 0 &&
|
||||
object->type == OBJT_VNODE) ||
|
||||
((object->flags & OBJ_TMPFS) != 0 &&
|
||||
object->type == OBJT_SWAP),
|
||||
("vm_map_entry_set_vnode_text: wrong object type, "
|
||||
"entry %p, object %p, add %d", entry, object, add));
|
||||
vp = (object->flags & OBJ_TMPFS) == 0 ? object->handle :
|
||||
object->un_pager.swp.swp_tmpfs;
|
||||
if (add)
|
||||
VOP_SET_TEXT_CHECKED(vp);
|
||||
else
|
||||
VOP_UNSET_TEXT_CHECKED(vp);
|
||||
}
|
||||
VM_OBJECT_RUNLOCK(object);
|
||||
}
|
||||
|
||||
static void
|
||||
vm_map_process_deferred(void)
|
||||
{
|
||||
@ -518,6 +558,9 @@ vm_map_process_deferred(void)
|
||||
td->td_map_def_user = NULL;
|
||||
while (entry != NULL) {
|
||||
next = entry->next;
|
||||
MPASS((entry->eflags & (MAP_ENTRY_VN_WRITECNT |
|
||||
MAP_ENTRY_VN_EXEC)) != (MAP_ENTRY_VN_WRITECNT |
|
||||
MAP_ENTRY_VN_EXEC));
|
||||
if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
|
||||
/*
|
||||
* Decrement the object's writemappings and
|
||||
@ -530,6 +573,7 @@ vm_map_process_deferred(void)
|
||||
vnode_pager_release_writecount(object, entry->start,
|
||||
entry->end);
|
||||
}
|
||||
vm_map_entry_set_vnode_text(entry, false);
|
||||
vm_map_entry_deallocate(entry, FALSE);
|
||||
entry = next;
|
||||
}
|
||||
@ -1372,6 +1416,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
|
||||
protoeflags |= MAP_ENTRY_GROWS_UP;
|
||||
if (cow & MAP_VN_WRITECOUNT)
|
||||
protoeflags |= MAP_ENTRY_VN_WRITECNT;
|
||||
if (cow & MAP_VN_EXEC)
|
||||
protoeflags |= MAP_ENTRY_VN_EXEC;
|
||||
if ((cow & MAP_CREATE_GUARD) != 0)
|
||||
protoeflags |= MAP_ENTRY_GUARD;
|
||||
if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
|
||||
@ -1415,7 +1461,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
} else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) ==
|
||||
protoeflags &&
|
||||
(cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 &&
|
||||
(cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP |
|
||||
MAP_VN_EXEC)) == 0 &&
|
||||
prev_entry->end == start && (prev_entry->cred == cred ||
|
||||
(prev_entry->object.vm_object != NULL &&
|
||||
prev_entry->object.vm_object->cred == cred)) &&
|
||||
@ -1937,7 +1984,7 @@ vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
|
||||
* another entry.
|
||||
*/
|
||||
#define MAP_ENTRY_NOMERGE_MASK (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \
|
||||
MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)
|
||||
MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_VN_EXEC)
|
||||
|
||||
static bool
|
||||
vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry)
|
||||
@ -2088,6 +2135,7 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
|
||||
|
||||
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
|
||||
vm_object_reference(new_entry->object.vm_object);
|
||||
vm_map_entry_set_vnode_text(new_entry, true);
|
||||
/*
|
||||
* The object->un_pager.vnp.writemappings for the
|
||||
* object of MAP_ENTRY_VN_WRITECNT type entry shall be
|
||||
@ -2170,6 +2218,7 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
|
||||
|
||||
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
|
||||
vm_object_reference(new_entry->object.vm_object);
|
||||
vm_map_entry_set_vnode_text(new_entry, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3858,6 +3907,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
vnode_pager_update_writecount(object,
|
||||
new_entry->start, new_entry->end);
|
||||
}
|
||||
vm_map_entry_set_vnode_text(new_entry, true);
|
||||
|
||||
/*
|
||||
* Insert the entry into the new map -- we know we're
|
||||
@ -3894,6 +3944,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
vmspace_map_entry_forked(vm1, vm2, new_entry);
|
||||
vm_map_copy_entry(old_map, new_map, old_entry,
|
||||
new_entry, fork_charge);
|
||||
vm_map_entry_set_vnode_text(new_entry, true);
|
||||
break;
|
||||
|
||||
case VM_INHERIT_ZERO:
|
||||
@ -3908,7 +3959,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
new_entry->end = old_entry->end;
|
||||
new_entry->eflags = old_entry->eflags &
|
||||
~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
|
||||
MAP_ENTRY_VN_WRITECNT);
|
||||
MAP_ENTRY_VN_WRITECNT | MAP_ENTRY_VN_EXEC);
|
||||
new_entry->protection = old_entry->protection;
|
||||
new_entry->max_protection = old_entry->max_protection;
|
||||
new_entry->inheritance = VM_INHERIT_ZERO;
|
||||
|
@ -136,7 +136,7 @@ struct vm_map_entry {
|
||||
#define MAP_ENTRY_IN_TRANSITION 0x0100 /* entry being changed */
|
||||
#define MAP_ENTRY_NEEDS_WAKEUP 0x0200 /* waiters in transition */
|
||||
#define MAP_ENTRY_NOCOREDUMP 0x0400 /* don't include in a core */
|
||||
|
||||
#define MAP_ENTRY_VN_EXEC 0x0800 /* text vnode mapping */
|
||||
#define MAP_ENTRY_GROWS_DOWN 0x1000 /* Top-down stacks */
|
||||
#define MAP_ENTRY_GROWS_UP 0x2000 /* Bottom-up stacks */
|
||||
|
||||
@ -352,6 +352,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
|
||||
#define MAP_ACC_NO_CHARGE 0x8000
|
||||
#define MAP_CREATE_STACK_GAP_UP 0x10000
|
||||
#define MAP_CREATE_STACK_GAP_DN 0x20000
|
||||
#define MAP_VN_EXEC 0x40000
|
||||
|
||||
/*
|
||||
* vm_fault option flags
|
||||
@ -424,5 +425,6 @@ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
|
||||
int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
|
||||
int flags);
|
||||
long vmspace_swap_count(struct vmspace *vmspace);
|
||||
void vm_map_entry_set_vnode_text(vm_map_entry_t entry, bool add);
|
||||
#endif /* _KERNEL */
|
||||
#endif /* _VM_MAP_ */
|
||||
|
@ -1202,14 +1202,13 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
|
||||
vm_object_t obj;
|
||||
vm_ooffset_t foff;
|
||||
struct ucred *cred;
|
||||
int error, flags, locktype;
|
||||
int error, flags;
|
||||
bool writex;
|
||||
|
||||
cred = td->td_ucred;
|
||||
if ((*maxprotp & VM_PROT_WRITE) && (*flagsp & MAP_SHARED))
|
||||
locktype = LK_EXCLUSIVE;
|
||||
else
|
||||
locktype = LK_SHARED;
|
||||
if ((error = vget(vp, locktype, td)) != 0)
|
||||
writex = (*maxprotp & VM_PROT_WRITE) != 0 &&
|
||||
(*flagsp & MAP_SHARED) != 0;
|
||||
if ((error = vget(vp, LK_SHARED, td)) != 0)
|
||||
return (error);
|
||||
AUDIT_ARG_VNODE1(vp);
|
||||
foff = *foffp;
|
||||
@ -1230,11 +1229,11 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
|
||||
* Bypass filesystems obey the mpsafety of the
|
||||
* underlying fs. Tmpfs never bypasses.
|
||||
*/
|
||||
error = vget(vp, locktype, td);
|
||||
error = vget(vp, LK_SHARED, td);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
if (locktype == LK_EXCLUSIVE) {
|
||||
if (writex) {
|
||||
*writecounted = TRUE;
|
||||
vnode_pager_update_writecount(obj, 0, objsize);
|
||||
}
|
||||
|
@ -493,33 +493,11 @@ vm_object_vndeallocate(vm_object_t object)
|
||||
if (!umtx_shm_vnobj_persistent && object->ref_count == 1)
|
||||
umtx_shm_object_terminated(object);
|
||||
|
||||
/*
|
||||
* The test for text of vp vnode does not need a bypass to
|
||||
* reach right VV_TEXT there, since it is obtained from
|
||||
* object->handle.
|
||||
*/
|
||||
if (object->ref_count > 1 || (vp->v_vflag & VV_TEXT) == 0) {
|
||||
object->ref_count--;
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
/* vrele may need the vnode lock. */
|
||||
vrele(vp);
|
||||
} else {
|
||||
vhold(vp);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vdrop(vp);
|
||||
VM_OBJECT_WLOCK(object);
|
||||
object->ref_count--;
|
||||
if (object->type == OBJT_DEAD) {
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
VOP_UNLOCK(vp, 0);
|
||||
} else {
|
||||
if (object->ref_count == 0)
|
||||
VOP_UNSET_TEXT(vp);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
vput(vp);
|
||||
}
|
||||
}
|
||||
object->ref_count--;
|
||||
|
||||
/* vrele may need the vnode lock. */
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
vrele(vp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -325,12 +325,23 @@ vnode_pager_dealloc(vm_object_t object)
|
||||
ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
|
||||
if (object->un_pager.vnp.writemappings > 0) {
|
||||
object->un_pager.vnp.writemappings = 0;
|
||||
VOP_ADD_WRITECOUNT(vp, -1);
|
||||
VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
|
||||
__func__, vp, vp->v_writecount);
|
||||
}
|
||||
vp->v_object = NULL;
|
||||
VOP_UNSET_TEXT(vp);
|
||||
VI_LOCK(vp);
|
||||
|
||||
/*
|
||||
* vm_map_entry_set_vnode_text() cannot reach this vnode by
|
||||
* following object->handle. Clear all text references now.
|
||||
* This also clears the transient references from
|
||||
* kern_execve(), which is fine because dead_vnodeops uses nop
|
||||
* for VOP_UNSET_TEXT().
|
||||
*/
|
||||
if (vp->v_writecount < 0)
|
||||
vp->v_writecount = 0;
|
||||
VI_UNLOCK(vp);
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
while (refs-- > 0)
|
||||
vunref(vp);
|
||||
@ -1513,13 +1524,13 @@ vnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
|
||||
object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start;
|
||||
vp = object->handle;
|
||||
if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) {
|
||||
ASSERT_VOP_ELOCKED(vp, "v_writecount inc");
|
||||
VOP_ADD_WRITECOUNT(vp, 1);
|
||||
ASSERT_VOP_LOCKED(vp, "v_writecount inc");
|
||||
VOP_ADD_WRITECOUNT_CHECKED(vp, 1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
|
||||
__func__, vp, vp->v_writecount);
|
||||
} else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) {
|
||||
ASSERT_VOP_ELOCKED(vp, "v_writecount dec");
|
||||
VOP_ADD_WRITECOUNT(vp, -1);
|
||||
ASSERT_VOP_LOCKED(vp, "v_writecount dec");
|
||||
VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
|
||||
CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
|
||||
__func__, vp, vp->v_writecount);
|
||||
}
|
||||
@ -1561,7 +1572,7 @@ vnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
mp = NULL;
|
||||
vn_start_write(vp, &mp, V_WAIT);
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
vn_lock(vp, LK_SHARED | LK_RETRY);
|
||||
|
||||
/*
|
||||
* Decrement the object's writemappings, by swapping the start
|
||||
|
Loading…
Reference in New Issue
Block a user