Export a list of VM objects in the system via a sysctl. The list can be

examined via 'vmstat -o'.  It can be used to determine which files are
using physical pages of memory and how much each is using.

Differential Revision:	https://reviews.freebsd.org/D2277
Reviewed by:	alc, kib
MFC after:	2 weeks
Sponsored by:	Norse Corp, Inc. (forward porting to HEAD/10)
This commit is contained in:
jhb 2015-05-27 18:11:05 +00:00
parent b365af7766
commit 4a4be98eae
8 changed files with 469 additions and 5 deletions

View File

@ -10,7 +10,8 @@ SHLIB_MAJOR= 9
SRCS= _secure_path.c auth.c expand_number.c flopen.c fparseln.c gr_util.c \
hexdump.c humanize_number.c kinfo_getfile.c kinfo_getfile.c \
kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c kld.c \
kinfo_getallproc.c kinfo_getproc.c kinfo_getvmmap.c \
kinfo_getvmobject.c kld.c \
login_auth.c login_cap.c \
login_class.c login_crypt.c login_ok.c login_times.c login_tty.c \
pidfile.c property.c pty.c pw_util.c quotafile.c realhostname.c \
@ -27,7 +28,8 @@ CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../libc/gen/
MAN+= expand_number.3 flopen.3 fparseln.3 hexdump.3 \
humanize_number.3 kinfo_getallproc.3 kinfo_getfile.3 \
kinfo_getproc.3 kinfo_getvmmap.3 kld.3 login_auth.3 login_cap.3 \
kinfo_getproc.3 kinfo_getvmmap.3 kinfo_getvmobject.3 kld.3 \
login_auth.3 login_cap.3 \
login_class.3 login_ok.3 login_times.3 login_tty.3 pidfile.3 \
property.3 pty.3 quotafile.3 realhostname.3 realhostname_sa.3 \
_secure_path.3 trimdomain.3 uucplock.3 pw_util.3

View File

@ -0,0 +1,74 @@
.\"
.\" Copyright (c) 2015 John Baldwin <jhb@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd May 27, 2015
.Dt KINFO_GETVMOBJECT 3
.Os
.Sh NAME
.Nm kinfo_getvmobject
.Nd function for getting system-wide memory information
.Sh LIBRARY
.Lb libutil
.Sh SYNOPSIS
.In sys/types.h
.In sys/user.h
.In libutil.h
.Ft struct kinfo_vmobject *
.Fn kinfo_getvmobject "int *cntp"
.Sh DESCRIPTION
This function is used to obtain information about the objects using memory
in the system.
.Pp
The
.Ar cntp
argument allows the caller to know how many records are returned.
.Pp
This function is a wrapper around the
.Dq vm.objects
.Xr sysctl 3
MIB.
While the kernel returns a packed structure, this function expands the
data into a fixed record format.
.Sh RETURN VALUES
On success the
.Fn kinfo_getvmobject
function returns a pointer to an array of
.Vt struct kinfo_vmobject
structures as defined by
.In sys/user.h .
The array is allocated by an internal call to
.Xr malloc 3
and must be freed by the caller with a call to
.Xr free 3 .
On failure the
.Fn kinfo_getvmobject
function returns
.Dv NULL .
.Sh SEE ALSO
.Xr free 3 ,
.Xr kinfo_getvmmap 3 ,
.Xr malloc 3

View File

@ -0,0 +1,94 @@
/*
* Copyright (c) 2013 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/user.h>
#include <stdlib.h>
#include <string.h>
#include "libutil.h"
struct kinfo_vmobject *
kinfo_getvmobject(int *cntp)
{
char *buf, *bp, *ep;
struct kinfo_vmobject *kvo, *list, *kp;
size_t len;
int cnt, i;
buf = NULL;
for (i = 0; i < 3; i++) {
if (sysctlbyname("vm.objects", NULL, &len, NULL, 0) < 0)
return (NULL);
buf = reallocf(buf, len);
if (buf == NULL)
return (NULL);
if (sysctlbyname("vm.objects", buf, &len, NULL, 0) == 0)
goto unpack;
if (errno != ENOMEM) {
free(buf);
return (NULL);
}
}
free(buf);
return (NULL);
unpack:
/* Count items */
cnt = 0;
bp = buf;
ep = buf + len;
while (bp < ep) {
kvo = (struct kinfo_vmobject *)(uintptr_t)bp;
bp += kvo->kvo_structsize;
cnt++;
}
list = calloc(cnt, sizeof(*list));
if (list == NULL) {
free(buf);
return (NULL);
}
/* Unpack */
bp = buf;
kp = list;
while (bp < ep) {
kvo = (struct kinfo_vmobject *)(uintptr_t)bp;
memcpy(kp, kvo, kvo->kvo_structsize);
bp += kvo->kvo_structsize;
kp->kvo_structsize = sizeof(*kp);
kp++;
}
free(buf);
*cntp = cnt;
return (list);
}

View File

@ -102,6 +102,8 @@ struct kinfo_file *
kinfo_getfile(pid_t _pid, int *_cntp);
struct kinfo_vmentry *
kinfo_getvmmap(pid_t _pid, int *_cntp);
struct kinfo_vmobject *
kinfo_getvmobject(int *_cntp);
struct kinfo_proc *
kinfo_getallproc(int *_cntp);
struct kinfo_proc *

View File

@ -485,6 +485,27 @@ struct kinfo_vmentry {
char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
};
/*
* The "vm.objects" sysctl provides a list of all VM objects in the system
* via an array of these entries.
*/
struct kinfo_vmobject {
int kvo_structsize; /* Variable size of record. */
int kvo_type; /* Object type: KVME_TYPE_*. */
uint64_t kvo_size; /* Object size in pages. */
uint64_t kvo_vn_fileid; /* inode number if vnode. */
uint32_t kvo_vn_fsid; /* dev_t of vnode location. */
int kvo_ref_count; /* Reference count. */
int kvo_shadow_count; /* Shadow count. */
int kvo_memattr; /* Memory attribute. */
uint64_t kvo_resident; /* Number of resident pages. */
uint64_t kvo_active; /* Number of active pages. */
uint64_t kvo_inactive; /* Number of inactive pages. */
uint64_t _kvo_qspare[8];
uint32_t _kvo_ispare[8];
char kvo_path[PATH_MAX]; /* Pathname, if any. */
};
/*
* The KERN_PROC_KSTACK sysctl allows a process to dump the kernel stacks of
* another process as a series of entries. Each stack is represented by a

View File

@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/user.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/sx.h>
@ -2286,6 +2287,142 @@ vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length,
}
}
static int
sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
{
struct kinfo_vmobject kvo;
char *fullpath, *freepath;
struct vnode *vp;
struct vattr va;
vm_object_t obj;
vm_page_t m;
int count, error;
if (req->oldptr == NULL) {
/*
* If an old buffer has not been provided, generate an
* estimate of the space needed for a subsequent call.
*/
mtx_lock(&vm_object_list_mtx);
count = 0;
TAILQ_FOREACH(obj, &vm_object_list, object_list) {
if (obj->type == OBJT_DEAD)
continue;
count++;
}
mtx_unlock(&vm_object_list_mtx);
return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
count * 11 / 10));
}
error = 0;
/*
* VM objects are type stable and are never removed from the
* list once added. This allows us to safely read obj->object_list
* after reacquiring the VM object lock.
*/
mtx_lock(&vm_object_list_mtx);
TAILQ_FOREACH(obj, &vm_object_list, object_list) {
if (obj->type == OBJT_DEAD)
continue;
VM_OBJECT_RLOCK(obj);
if (obj->type == OBJT_DEAD) {
VM_OBJECT_RUNLOCK(obj);
continue;
}
mtx_unlock(&vm_object_list_mtx);
kvo.kvo_size = ptoa(obj->size);
kvo.kvo_resident = obj->resident_page_count;
kvo.kvo_ref_count = obj->ref_count;
kvo.kvo_shadow_count = obj->shadow_count;
kvo.kvo_memattr = obj->memattr;
kvo.kvo_active = 0;
kvo.kvo_inactive = 0;
TAILQ_FOREACH(m, &obj->memq, listq) {
/*
* A page may belong to the object but be
* dequeued and set to PQ_NONE while the
* object lock is not held. This makes the
* reads of m->queue below racy, and we do not
* count pages set to PQ_NONE. However, this
* sysctl is only meant to give an
* approximation of the system anyway.
*/
if (m->queue == PQ_ACTIVE)
kvo.kvo_active++;
else if (m->queue == PQ_INACTIVE)
kvo.kvo_inactive++;
}
kvo.kvo_vn_fileid = 0;
kvo.kvo_vn_fsid = 0;
freepath = NULL;
fullpath = "";
vp = NULL;
switch (obj->type) {
case OBJT_DEFAULT:
kvo.kvo_type = KVME_TYPE_DEFAULT;
break;
case OBJT_VNODE:
kvo.kvo_type = KVME_TYPE_VNODE;
vp = obj->handle;
vref(vp);
break;
case OBJT_SWAP:
kvo.kvo_type = KVME_TYPE_SWAP;
break;
case OBJT_DEVICE:
kvo.kvo_type = KVME_TYPE_DEVICE;
break;
case OBJT_PHYS:
kvo.kvo_type = KVME_TYPE_PHYS;
break;
case OBJT_DEAD:
kvo.kvo_type = KVME_TYPE_DEAD;
break;
case OBJT_SG:
kvo.kvo_type = KVME_TYPE_SG;
break;
case OBJT_MGTDEVICE:
kvo.kvo_type = KVME_TYPE_MGTDEVICE;
break;
default:
kvo.kvo_type = KVME_TYPE_UNKNOWN;
break;
}
VM_OBJECT_RUNLOCK(obj);
if (vp != NULL) {
vn_fullpath(curthread, vp, &fullpath, &freepath);
vn_lock(vp, LK_SHARED | LK_RETRY);
if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
kvo.kvo_vn_fileid = va.va_fileid;
kvo.kvo_vn_fsid = va.va_fsid;
}
vput(vp);
}
strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path));
if (freepath != NULL)
free(freepath, M_TEMP);
/* Pack record size down */
kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) +
strlen(kvo.kvo_path) + 1;
kvo.kvo_structsize = roundup(kvo.kvo_structsize,
sizeof(uint64_t));
error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize);
mtx_lock(&vm_object_list_mtx);
if (error)
break;
}
mtx_unlock(&vm_object_list_mtx);
return (error);
}
SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
"List of VM objects");
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kernel.h>

View File

@ -37,7 +37,7 @@
.Sh SYNOPSIS
.Nm
.\" .Op Fl fimst
.Op Fl afHhimPsz
.Op Fl afHhimoPsz
.Op Fl M Ar core Op Fl N Ar system
.Op Fl c Ar count
.Op Fl n Ar devs
@ -119,6 +119,9 @@ Report on the usage of kernel dynamic memory allocated using
by type.
.It Fl n
Change the maximum number of disks to display from the default of 2.
.It Fl o
Display a list of virtual memory objects in the system and the resident
memory used by each object.
.It Fl P
Report per-cpu system/user/idle cpu statistics.
.It Fl p

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resource.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/pcpu.h>
@ -143,12 +144,14 @@ static kvm_t *kd;
#define TIMESTAT 0x10
#define VMSTAT 0x20
#define ZMEMSTAT 0x40
#define OBJSTAT 0x80
static void cpustats(void);
static void pcpustats(int, u_long, int);
static void devstats(void);
static void doforkst(void);
static void dointr(unsigned int, int);
static void doobjstat(void);
static void dosum(void);
static void dovmstat(unsigned int, int);
static void domemstat_malloc(void);
@ -181,7 +184,7 @@ main(int argc, char *argv[])
interval = reps = todo = 0;
maxshowdevs = 2;
hflag = isatty(1);
while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:Pp:stw:z")) != -1) {
while ((c = getopt(argc, argv, "ac:fhHiM:mN:n:oPp:stw:z")) != -1) {
switch (c) {
case 'a':
aflag++;
@ -220,6 +223,9 @@ main(int argc, char *argv[])
errx(1, "number of devices %d is < 0",
maxshowdevs);
break;
case 'o':
todo |= OBJSTAT;
break;
case 'p':
if (devstat_buildmatch(optarg, &matches, &num_matches) != 0)
errx(1, "%s", devstat_errbuf);
@ -320,6 +326,8 @@ main(int argc, char *argv[])
domemstat_zone();
if (todo & SUMSTAT)
dosum();
if (todo & OBJSTAT)
doobjstat();
#ifdef notyet
if (todo & TIMESTAT)
dotimes();
@ -1396,6 +1404,129 @@ domemstat_zone(void)
printf("\n");
}
static void
display_object(struct kinfo_vmobject *kvo)
{
const char *str;
printf("%5jd ", (uintmax_t)kvo->kvo_resident);
printf("%5jd ", (uintmax_t)kvo->kvo_active);
printf("%5jd ", (uintmax_t)kvo->kvo_inactive);
printf("%3d ", kvo->kvo_ref_count);
printf("%3d ", kvo->kvo_shadow_count);
switch (kvo->kvo_memattr) {
#ifdef VM_MEMATTR_UNCACHEABLE
case VM_MEMATTR_UNCACHEABLE:
str = "UC";
break;
#endif
#ifdef VM_MEMATTR_WRITE_COMBINING
case VM_MEMATTR_WRITE_COMBINING:
str = "WC";
break;
#endif
#ifdef VM_MEMATTR_WRITE_THROUGH
case VM_MEMATTR_WRITE_THROUGH:
str = "WT";
break;
#endif
#ifdef VM_MEMATTR_WRITE_PROTECTED
case VM_MEMATTR_WRITE_PROTECTED:
str = "WP";
break;
#endif
#ifdef VM_MEMATTR_WRITE_BACK
case VM_MEMATTR_WRITE_BACK:
str = "WB";
break;
#endif
#ifdef VM_MEMATTR_WEAK_UNCACHEABLE
case VM_MEMATTR_WEAK_UNCACHEABLE:
str = "UC-";
break;
#endif
#ifdef VM_MEMATTR_WB_WA:
case VM_MEMATTR_WB_WA:
str = "WB";
break;
#endif
#ifdef VM_MEMATTR_NOCACHE
case VM_MEMATTR_NOCACHE:
str = "NC";
break;
#endif
#ifdef VM_MEMATTR_DEVICE
case VM_MEMATTR_DEVICE:
str = "DEV";
break;
#endif
#ifdef VM_MEMATTR_CACHEABLE
case VM_MEMATTR_CACHEABLE:
str = "C";
break;
#endif
#ifdef VM_MEMATTR_PREFETCHABLE
case VM_MEMATTR_PREFETCHABLE:
str = "PRE";
break;
#endif
default:
str = "??";
break;
}
printf("%-3s ", str);
switch (kvo->kvo_type) {
case KVME_TYPE_NONE:
str = "--";
break;
case KVME_TYPE_DEFAULT:
str = "df";
break;
case KVME_TYPE_VNODE:
str = "vn";
break;
case KVME_TYPE_SWAP:
str = "sw";
break;
case KVME_TYPE_DEVICE:
str = "dv";
break;
case KVME_TYPE_PHYS:
str = "ph";
break;
case KVME_TYPE_DEAD:
str = "dd";
break;
case KVME_TYPE_SG:
str = "sg";
break;
case KVME_TYPE_UNKNOWN:
default:
str = "??";
break;
}
printf("%-2s ", str);
printf("%-s\n", kvo->kvo_path);
}
static void
doobjstat(void)
{
struct kinfo_vmobject *kvo;
int cnt, i;
kvo = kinfo_getvmobject(&cnt);
if (kvo == NULL) {
warn("Failed to fetch VM object list");
return;
}
printf("%5s %5s %5s %3s %3s %3s %2s %s\n", "RES", "ACT", "INACT",
"REF", "SHD", "CM", "TP", "PATH");
for (i = 0; i < cnt; i++)
display_object(&kvo[i]);
free(kvo);
}
/*
* kread reads something from the kernel, given its nlist index.
*/
@ -1448,7 +1579,7 @@ static void
usage(void)
{
(void)fprintf(stderr, "%s%s",
"usage: vmstat [-afHhimPsz] [-M core [-N system]] [-c count] [-n devs]\n",
"usage: vmstat [-afHhimoPsz] [-M core [-N system]] [-c count] [-n devs]\n",
" [-p type,if,pass] [-w wait] [disks] [wait [count]]\n");
exit(1);
}