From 389d2b6e21823a73e17a36e29f71ed56771ccdc7 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 15 Dec 2002 18:50:04 +0000 Subject: [PATCH] Fix a refcount race with the vmspace structure. In order to prevent resource starvation we clean-up as much of the vmspace structure as we can when the last process using it exits. The rest of the structure is cleaned up when it is reaped. But since exit1() decrements the ref count it is possible for a double-free to occur if someone else, such as the process swapout code, references and then dereferences the structure. Additionally, the final cleanup of the structure should not occur until the last process referencing it is reaped. This commit solves the problem by introducing a secondary reference count, calling 'vm_exitingcnt'. The normal reference count is decremented on exit and vm_exitingcnt is incremented. vm_exitingcnt is decremented when the process is reaped. When both vm_exitingcnt and vm_refcnt are 0, the structure is freed for real. MFC after: 3 weeks --- sys/kern/kern_exit.c | 9 ++++++++- sys/vm/vm_map.c | 23 +++++++++++++++++------ sys/vm/vm_map.h | 4 ++-- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 38d8092da06e..8737bed37c32 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -287,7 +287,15 @@ exit1(td, rv) * Need to do this early enough that we can still sleep. * Can't free the entire vmspace as the kernel stack * may be mapped within that space also. + * + * Processes sharing the same vmspace may exit in one order, and + * get cleaned up by vmspace_exit() in a different order. The + * last exiting process to reach this point releases as much of + * the environment as it can, and the last process cleaned up + * by vmspace_exit() (which decrements exitingcnt) cleans up the + * remainder. */ + ++vm->vm_exitingcnt; if (--vm->vm_refcnt == 0) { if (vm->vm_shm) shmexit(p); @@ -297,7 +305,6 @@ exit1(td, rv) vm_page_unlock_queues(); (void) vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), vm_map_max(&vm->vm_map)); - vm->vm_freer = p; } sx_xlock(&proctree_lock); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index b7f6ac0cf951..d51bd2d9399d 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -258,7 +258,7 @@ vmspace_alloc(min, max) vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ vm->vm_refcnt = 1; vm->vm_shm = NULL; - vm->vm_freer = NULL; + vm->vm_exitingcnt = 0; return (vm); } @@ -304,7 +304,7 @@ vmspace_free(struct vmspace *vm) if (vm->vm_refcnt == 0) panic("vmspace_free: attempt to free already freed vmspace"); - if (--vm->vm_refcnt == 0) + if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0) vmspace_dofree(vm); } @@ -314,11 +314,22 @@ vmspace_exitfree(struct proc *p) struct vmspace *vm; GIANT_REQUIRED; - if (p == p->p_vmspace->vm_freer) { - vm = p->p_vmspace; - p->p_vmspace = NULL; + vm = p->p_vmspace; + p->p_vmspace = NULL; + + /* + * cleanup by parent process wait()ing on exiting child. vm_refcnt + * may not be 0 (e.g. fork() and child exits without exec()ing). + * exitingcnt may increment above 0 and drop back down to zero + * several times while vm_refcnt is held non-zero. vm_refcnt + * may also increment above 0 and drop back down to zero several + * times while vm_exitingcnt is held non-zero. + * + * The last wait on the exiting child's vmspace will clean up + * the remainder of the vmspace. + */ + if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0) vmspace_dofree(vm); - } } /* diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 3ef827c688dd..2f629c67eb8d 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -218,8 +218,8 @@ struct vmspace { caddr_t vm_taddr; /* (c) user virtual address of text */ caddr_t vm_daddr; /* (c) user virtual address of data */ caddr_t vm_maxsaddr; /* user VA at max stack growth */ -#define vm_endcopy vm_freer - struct proc *vm_freer; /* vm freed on whose behalf */ +#define vm_endcopy vm_exitingcnt + int vm_exitingcnt; /* several processes zombied in exit1 */ }; #ifdef _KERNEL