Close race between vmspace_exitfree() and exit1() and races between

vmspace_exitfree() and vmspace_free() which could result in the same
vmspace being freed twice.

Factor out part of exit1() into new function vmspace_exit().  Attach
to vmspace0 to allow old vmspace to be freed earlier.

Add new function, vmspace_acquire_ref(), for obtaining a vmspace
reference for a vmspace belonging to another process.  Avoid changing
vmspace refcount from 0 to 1 since that could also lead to the same
vmspace being freed twice.

Change vmtotal() and swapout_procs() to use vmspace_acquire_ref().

Reviewed by:	alc
This commit is contained in:
tegge 2006-05-29 21:28:56 +00:00
parent ff05f5f32b
commit 0d5f191162
6 changed files with 104 additions and 53 deletions

View File

@ -113,14 +113,13 @@ exit1(struct thread *td, int rv)
struct proc *p, *nq, *q;
struct tty *tp;
struct vnode *ttyvp;
struct vmspace *vm;
struct vnode *vtmp;
#ifdef KTRACE
struct vnode *tracevp;
struct ucred *tracecred;
#endif
struct plimit *plim;
int locked, refcnt;
int locked;
/*
* Drop Giant if caller has it. Eventually we should warn about
@ -300,33 +299,7 @@ exit1(struct thread *td, int rv)
}
mtx_unlock(&ppeers_lock);
/* The next two chunks should probably be moved to vmspace_exit. */
vm = p->p_vmspace;
/*
* Release user portion of address space.
* This releases references to vnodes,
* which could cause I/O if the file has been unlinked.
* Need to do this early enough that we can still sleep.
* Can't free the entire vmspace as the kernel stack
* may be mapped within that space also.
*
* Processes sharing the same vmspace may exit in one order, and
* get cleaned up by vmspace_exit() in a different order. The
* last exiting process to reach this point releases as much of
* the environment as it can, and the last process cleaned up
* by vmspace_exit() (which decrements exitingcnt) cleans up the
* remainder.
*/
atomic_add_int(&vm->vm_exitingcnt, 1);
do
refcnt = vm->vm_refcnt;
while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
if (refcnt == 1) {
shmexit(vm);
pmap_remove_pages(vmspace_pmap(vm));
(void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map),
vm_map_max(&vm->vm_map));
}
vmspace_exit(td);
sx_xlock(&proctree_lock);
if (SESS_LEADER(p)) {

View File

@ -78,6 +78,8 @@ struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t);
struct vmspace *vmspace_fork(struct vmspace *);
void vmspace_exec(struct proc *, vm_offset_t, vm_offset_t);
void vmspace_unshare(struct proc *);
void vmspace_exit(struct thread *);
struct vmspace *vmspace_acquire_ref(struct proc *);
void vmspace_free(struct vmspace *);
void vmspace_exitfree(struct proc *);
void vnode_pager_setsize(struct vnode *, vm_ooffset_t);

View File

@ -852,12 +852,9 @@ int action;
* process may attempt to alter
* the map.
*/
PROC_LOCK(p);
vm = p->p_vmspace;
KASSERT(vm != NULL,
("swapout_procs: a process has no address space"));
atomic_add_int(&vm->vm_refcnt, 1);
PROC_UNLOCK(p);
vm = vmspace_acquire_ref(p);
if (vm == NULL)
continue;
if (!vm_map_trylock(&vm->vm_map))
goto nextproc1;

View File

@ -148,6 +148,13 @@ static void vm_map_zdtor(void *mem, int size, void *arg);
static void vmspace_zdtor(void *mem, int size, void *arg);
#endif
/*
* PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
* stable.
*/
#define PROC_VMSPACE_LOCK(p) do { } while (0)
#define PROC_VMSPACE_UNLOCK(p) do { } while (0)
void
vm_map_startup(void)
{
@ -261,7 +268,6 @@ vmspace_alloc(min, max)
vm->vm_taddr = 0;
vm->vm_daddr = 0;
vm->vm_maxsaddr = 0;
vm->vm_exitingcnt = 0;
return (vm);
}
@ -313,7 +319,7 @@ vmspace_free(struct vmspace *vm)
do
refcnt = vm->vm_refcnt;
while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
if (refcnt == 1 && vm->vm_exitingcnt == 0)
if (refcnt == 1)
vmspace_dofree(vm);
}
@ -321,28 +327,93 @@ void
vmspace_exitfree(struct proc *p)
{
struct vmspace *vm;
int exitingcnt;
PROC_VMSPACE_LOCK(p);
vm = p->p_vmspace;
p->p_vmspace = NULL;
PROC_VMSPACE_UNLOCK(p);
KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
vmspace_free(vm);
}
void
vmspace_exit(struct thread *td)
{
int refcnt;
struct vmspace *vm;
struct proc *p;
/*
* cleanup by parent process wait()ing on exiting child. vm_refcnt
* may not be 0 (e.g. fork() and child exits without exec()ing).
* exitingcnt may increment above 0 and drop back down to zero
* several times while vm_refcnt is held non-zero. vm_refcnt
* may also increment above 0 and drop back down to zero several
* times while vm_exitingcnt is held non-zero.
* Release user portion of address space.
* This releases references to vnodes,
* which could cause I/O if the file has been unlinked.
* Need to do this early enough that we can still sleep.
*
* The last wait on the exiting child's vmspace will clean up
* the remainder of the vmspace.
* The last exiting process to reach this point releases as
* much of the environment as it can. vmspace_dofree() is the
* slower fallback in case another process had a temporary
* reference to the vmspace.
*/
do
exitingcnt = vm->vm_exitingcnt;
while (!atomic_cmpset_int(&vm->vm_exitingcnt, exitingcnt,
exitingcnt - 1));
if (vm->vm_refcnt == 0 && exitingcnt == 1)
p = td->td_proc;
vm = p->p_vmspace;
atomic_add_int(&vmspace0.vm_refcnt, 1);
do {
refcnt = vm->vm_refcnt;
if (refcnt > 1 && p->p_vmspace != &vmspace0) {
/* Switch now since other proc might free vmspace */
PROC_VMSPACE_LOCK(p);
p->p_vmspace = &vmspace0;
PROC_VMSPACE_UNLOCK(p);
pmap_activate(td);
}
} while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
if (refcnt == 1) {
if (p->p_vmspace != vm) {
/* vmspace not yet freed, switch back */
PROC_VMSPACE_LOCK(p);
p->p_vmspace = vm;
PROC_VMSPACE_UNLOCK(p);
pmap_activate(td);
}
pmap_remove_pages(vmspace_pmap(vm));
/* Switch now since this proc will free vmspace */
PROC_VMSPACE_LOCK(p);
p->p_vmspace = &vmspace0;
PROC_VMSPACE_UNLOCK(p);
pmap_activate(td);
vmspace_dofree(vm);
}
}
/* Acquire reference to vmspace owned by another process. */
struct vmspace *
vmspace_acquire_ref(struct proc *p)
{
struct vmspace *vm;
int refcnt;
PROC_VMSPACE_LOCK(p);
vm = p->p_vmspace;
if (vm == NULL) {
PROC_VMSPACE_UNLOCK(p);
return (NULL);
}
do {
refcnt = vm->vm_refcnt;
if (refcnt <= 0) { /* Avoid 0->1 transition */
PROC_VMSPACE_UNLOCK(p);
return (NULL);
}
} while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt + 1));
if (vm != p->p_vmspace) {
PROC_VMSPACE_UNLOCK(p);
vmspace_free(vm);
return (NULL);
}
PROC_VMSPACE_UNLOCK(p);
return (vm);
}
void
@ -2923,7 +2994,9 @@ vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
* run it down. Even though there is little or no chance of blocking
* here, it is a good idea to keep this form for future mods.
*/
PROC_VMSPACE_LOCK(p);
p->p_vmspace = newvmspace;
PROC_VMSPACE_UNLOCK(p);
if (p == curthread->td_proc) /* XXXKSE ? */
pmap_activate(curthread);
vmspace_free(oldvmspace);
@ -2942,7 +3015,9 @@ vmspace_unshare(struct proc *p)
if (oldvmspace->vm_refcnt == 1)
return;
newvmspace = vmspace_fork(oldvmspace);
PROC_VMSPACE_LOCK(p);
p->p_vmspace = newvmspace;
PROC_VMSPACE_UNLOCK(p);
if (p == curthread->td_proc) /* XXXKSE ? */
pmap_activate(curthread);
vmspace_free(oldvmspace);

View File

@ -242,7 +242,6 @@ struct vmspace {
caddr_t vm_taddr; /* (c) user virtual address of text */
caddr_t vm_daddr; /* (c) user virtual address of data */
caddr_t vm_maxsaddr; /* user VA at max stack growth */
int vm_exitingcnt; /* several processes zombied in exit1 */
int vm_refcnt; /* number of references */
};

View File

@ -115,6 +115,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
vm_map_t map;
int paging;
struct thread *td;
struct vmspace *vm;
totalp = &total;
bzero(totalp, sizeof *totalp);
@ -185,7 +186,10 @@ vmtotal(SYSCTL_HANDLER_ARGS)
* Note active objects.
*/
paging = 0;
map = &p->p_vmspace->vm_map;
vm = vmspace_acquire_ref(p);
if (vm == NULL)
continue;
map = &vm->vm_map;
vm_map_lock_read(map);
for (entry = map->header.next;
entry != &map->header; entry = entry->next) {
@ -198,6 +202,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
VM_OBJECT_UNLOCK(object);
}
vm_map_unlock_read(map);
vmspace_free(vm);
if (paging)
totalp->t_pw++;
}