The vmtotal sysctl handler marks active vm objects to calculate

statistics.  Marking is done by setting the OBJ_ACTIVE flag.  The
flags change is locked, but the problem is that many parts of system
assume that vm object initialization ensures that no other code could
change the object, and thus performed lockless.  The end result is
corrupted flags in vm objects, most visible is spurious OBJ_DEAD flag,
causing random hangs.

Avoid the active object marking, instead provide equally inexact but
immutable is_object_alive() definition for the object mapped state.

Avoid iterating over the processes mappings altogether by using
arguably improved definition of the paging thread as one which sleeps
on the v_free_count.

PR:	204764
Diagnosed by:	pho
Tested by:	pho (previous version)
Reviewed by:	alc
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Approved by:	re (gjb)
This commit is contained in:
kib 2016-06-21 17:49:33 +00:00
parent 1f3d721c13
commit 907f8ff806
2 changed files with 37 additions and 46 deletions

View File

@ -89,29 +89,31 @@ SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRUCT | CTLFLAG_RD |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg",
"Machine loadaverage history");
/*
* This function aims to determine if the object is mapped,
* specifically, if it is referenced by a vm_map_entry. Because
* objects occasionally acquire transient references that do not
* represent a mapping, the method used here is inexact. However, it
* has very low overhead and is good enough for the advisory
* vm.vmtotal sysctl.
*/
static bool
is_object_active(vm_object_t obj)
{
return (obj->ref_count > obj->shadow_count);
}
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
struct proc *p;
struct vmtotal total;
vm_map_entry_t entry;
vm_object_t object;
vm_map_t map;
int paging;
struct proc *p;
struct thread *td;
struct vmspace *vm;
bzero(&total, sizeof(total));
/*
* Mark all objects as inactive.
*/
mtx_lock(&vm_object_list_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list) {
VM_OBJECT_WLOCK(object);
vm_object_clear_flag(object, OBJ_ACTIVE);
VM_OBJECT_WUNLOCK(object);
}
mtx_unlock(&vm_object_list_mtx);
/*
* Calculate process statistics.
*/
@ -132,11 +134,15 @@ vmtotal(SYSCTL_HANDLER_ARGS)
case TDS_INHIBITED:
if (TD_IS_SWAPPED(td))
total.t_sw++;
else if (TD_IS_SLEEPING(td) &&
td->td_priority <= PZERO)
total.t_dw++;
else
total.t_sl++;
else if (TD_IS_SLEEPING(td)) {
if (td->td_priority <= PZERO)
total.t_dw++;
else
total.t_sl++;
if (td->td_wchan ==
&vm_cnt.v_free_count)
total.t_pw++;
}
break;
case TDS_CAN_RUN:
@ -154,29 +160,6 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
PROC_UNLOCK(p);
/*
* Note active objects.
*/
paging = 0;
vm = vmspace_acquire_ref(p);
if (vm == NULL)
continue;
map = &vm->vm_map;
vm_map_lock_read(map);
for (entry = map->header.next;
entry != &map->header; entry = entry->next) {
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
(object = entry->object.vm_object) == NULL)
continue;
VM_OBJECT_WLOCK(object);
vm_object_set_flag(object, OBJ_ACTIVE);
paging |= object->paging_in_progress;
VM_OBJECT_WUNLOCK(object);
}
vm_map_unlock_read(map);
vmspace_free(vm);
if (paging)
total.t_pw++;
}
sx_sunlock(&allproc_lock);
/*
@ -202,9 +185,18 @@ vmtotal(SYSCTL_HANDLER_ARGS)
*/
continue;
}
if (object->ref_count == 1 &&
(object->flags & OBJ_NOSPLIT) != 0) {
/*
* Also skip otherwise unreferenced swap
* objects backing tmpfs vnodes, and POSIX or
* SysV shared memory.
*/
continue;
}
total.t_vm += object->size;
total.t_rm += object->resident_page_count;
if (object->flags & OBJ_ACTIVE) {
if (is_object_active(object)) {
total.t_avm += object->size;
total.t_arm += object->resident_page_count;
}
@ -212,7 +204,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/* shared object */
total.t_vmshr += object->size;
total.t_rmshr += object->resident_page_count;
if (object->flags & OBJ_ACTIVE) {
if (is_object_active(object)) {
total.t_avmshr += object->size;
total.t_armshr += object->resident_page_count;
}

View File

@ -182,7 +182,6 @@ struct vm_object {
*/
#define OBJ_FICTITIOUS 0x0001 /* (c) contains fictitious pages */
#define OBJ_UNMANAGED 0x0002 /* (c) contains unmanaged pages */
#define OBJ_ACTIVE 0x0004 /* active objects */
#define OBJ_DEAD 0x0008 /* dead objects (during rundown) */
#define OBJ_NOSPLIT 0x0010 /* dont split this object */
#define OBJ_UMTXDEAD 0x0020 /* umtx pshared was terminated */