Add two new sysctls in support of the forthcoming procstat(1) to support

its -f and -v arguments:

kern.proc.filedesc - dump file descriptor information for a process, if
  debugging is permitted, including socket addresses, open flags, file
  offsets, file paths, etc.

kern.proc.vmmap - dump virtual memory mapping information for a process,
  if debugging is permitted, including layout and information on
  underlying objects, such as the type of object and path.

These provide a superset of the information historically available
through the now-deprecated procfs(4), and are intended to be exported
in an ABI-robust form.
This commit is contained in:
rwatson 2007-12-02 10:10:27 +00:00
parent 663523e681
commit c25458da37
4 changed files with 434 additions and 3 deletions

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/domain.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@ -59,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <sys/namei.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/socketvar.h>
@ -68,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/unistd.h>
#include <sys/user.h>
#include <sys/vnode.h>
#include <security/audit/audit.h>
@ -2405,7 +2408,7 @@ filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, s
}
/*
* Get file structures.
* Get file structures globally.
*/
static int
sysctl_kern_file(SYSCTL_HANDLER_ARGS)
@ -2488,6 +2491,182 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
/*
* Get per-process file descriptors for use by procstat(1), et al.
*/
static int
sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
{
char *fullpath, *freepath;
struct kinfo_file *kif;
struct filedesc *fdp;
int error, i, *name;
struct socket *so;
struct vnode *vp;
struct file *fp;
struct proc *p;
int vfslocked;
name = (int *)arg1;
if ((p = pfind((pid_t)name[0])) == NULL)
return (ESRCH);
if ((error = p_candebug(curthread, p))) {
PROC_UNLOCK(p);
return (error);
}
fdp = fdhold(p);
PROC_UNLOCK(p);
kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
FILEDESC_SLOCK(fdp);
for (i = 0; i < fdp->fd_nfiles; i++) {
if ((fp = fdp->fd_ofiles[i]) == NULL)
continue;
bzero(kif, sizeof(*kif));
kif->kf_structsize = sizeof(*kif);
FILE_LOCK(fp);
vp = NULL;
so = NULL;
kif->kf_fd = i;
switch (fp->f_type) {
case DTYPE_VNODE:
kif->kf_type = KF_TYPE_VNODE;
vp = fp->f_vnode;
vref(vp);
break;
case DTYPE_SOCKET:
kif->kf_type = KF_TYPE_SOCKET;
so = fp->f_data;
break;
case DTYPE_PIPE:
kif->kf_type = KF_TYPE_PIPE;
break;
case DTYPE_FIFO:
kif->kf_type = KF_TYPE_FIFO;
vp = fp->f_vnode;
vref(vp);
break;
case DTYPE_KQUEUE:
kif->kf_type = KF_TYPE_KQUEUE;
break;
case DTYPE_CRYPTO:
kif->kf_type = KF_TYPE_CRYPTO;
break;
case DTYPE_MQUEUE:
kif->kf_type = KF_TYPE_MQUEUE;
break;
default:
kif->kf_type = KF_TYPE_UNKNOWN;
break;
}
kif->kf_ref_count = fp->f_count;
if (fp->f_flag & FREAD)
kif->kf_flags |= KF_FLAG_READ;
if (fp->f_flag & FWRITE)
kif->kf_flags |= KF_FLAG_WRITE;
if (fp->f_flag & FAPPEND)
kif->kf_flags |= KF_FLAG_APPEND;
if (fp->f_flag & FASYNC)
kif->kf_flags |= KF_FLAG_ASYNC;
if (fp->f_flag & FFSYNC)
kif->kf_flags |= KF_FLAG_FSYNC;
if (fp->f_flag & FNONBLOCK)
kif->kf_flags |= KF_FLAG_NONBLOCK;
if (fp->f_flag & O_DIRECT)
kif->kf_flags |= KF_FLAG_DIRECT;
if (fp->f_flag & FHASLOCK)
kif->kf_flags |= KF_FLAG_HASLOCK;
kif->kf_offset = fp->f_offset;
FILE_UNLOCK(fp);
if (vp != NULL) {
switch (vp->v_type) {
case VNON:
kif->kf_vnode_type = KF_VTYPE_VNON;
break;
case VREG:
kif->kf_vnode_type = KF_VTYPE_VREG;
break;
case VDIR:
kif->kf_vnode_type = KF_VTYPE_VDIR;
break;
case VBLK:
kif->kf_vnode_type = KF_VTYPE_VBLK;
break;
case VCHR:
kif->kf_vnode_type = KF_VTYPE_VCHR;
break;
case VLNK:
kif->kf_vnode_type = KF_VTYPE_VLNK;
break;
case VSOCK:
kif->kf_vnode_type = KF_VTYPE_VSOCK;
break;
case VFIFO:
kif->kf_vnode_type = KF_VTYPE_VFIFO;
break;
case VBAD:
kif->kf_vnode_type = KF_VTYPE_VBAD;
break;
default:
kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
break;
}
/*
* It is OK to drop the filedesc lock here as we will
* re-validate and re-evaluate its properties when
* the loop continues.
*/
freepath = NULL;
fullpath = "-";
FILEDESC_SUNLOCK(fdp);
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
vn_fullpath(curthread, vp, &fullpath, &freepath);
vput(vp);
VFS_UNLOCK_GIANT(vfslocked);
strlcpy(kif->kf_path, fullpath,
sizeof(kif->kf_path));
if (freepath != NULL)
free(freepath, M_TEMP);
FILEDESC_SLOCK(fdp);
}
if (so != NULL) {
struct sockaddr *sa;
if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
== 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
bcopy(sa, &kif->kf_sa_local, sa->sa_len);
free(sa, M_SONAME);
}
if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
== 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
free(sa, M_SONAME);
}
kif->kf_sock_domain =
so->so_proto->pr_domain->dom_family;
kif->kf_sock_type = so->so_type;
kif->kf_sock_protocol = so->so_proto->pr_protocol;
}
error = SYSCTL_OUT(req, kif, sizeof(*kif));
if (error)
break;
}
FILEDESC_SUNLOCK(fdp);
fddrop(fdp);
free(kif, M_TEMP);
return (0);
}
static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD,
sysctl_kern_proc_filedesc, "Process filedesc entries");
#ifdef DDB
/*
* For the purposes of debugging, generate a human-readable string for the

View File

@ -32,6 +32,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ddb.h"
#include "opt_ktrace.h"
#include "opt_kstack_pages.h"
@ -40,9 +41,11 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/sbuf.h>
#include <sys/sysent.h>
#include <sys/sched.h>
#include <sys/smp.h>
@ -60,10 +63,15 @@ __FBSDID("$FreeBSD$");
#include <sys/ktrace.h>
#endif
#ifdef DDB
#include <ddb/ddb.h>
#endif
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/uma.h>
MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
@ -1284,8 +1292,155 @@ sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
return (sysctl_handle_string(oidp, sv_name, 0, req));
}
static int
sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
{
vm_map_entry_t entry, tmp_entry;
unsigned int last_timestamp;
char *fullpath, *freepath;
struct kinfo_vmentry *kve;
int error, *name;
struct vnode *vp;
struct proc *p;
vm_map_t map;
static SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table");
name = (int *)arg1;
if ((p = pfind((pid_t)name[0])) == NULL)
return (ESRCH);
if ((error = p_candebug(curthread, p))) {
PROC_UNLOCK(p);
return (error);
}
_PHOLD(p);
PROC_UNLOCK(p);
kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
map = &p->p_vmspace->vm_map; /* XXXRW: More locking required? */
vm_map_lock_read(map);
for (entry = map->header.next; entry != &map->header;
entry = entry->next) {
vm_object_t obj, tobj, lobj;
vm_offset_t addr;
int vfslocked;
if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
continue;
bzero(kve, sizeof(*kve));
kve->kve_structsize = sizeof(*kve);
kve->kve_private_resident = 0;
obj = entry->object.vm_object;
if (obj != NULL) {
VM_OBJECT_LOCK(obj);
if (obj->shadow_count == 1)
kve->kve_private_resident =
obj->resident_page_count;
}
kve->kve_resident = 0;
addr = entry->start;
while (addr < entry->end) {
if (pmap_extract(map->pmap, addr))
kve->kve_resident++;
addr += PAGE_SIZE;
}
for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
if (tobj != obj)
VM_OBJECT_LOCK(tobj);
if (lobj != obj)
VM_OBJECT_UNLOCK(lobj);
lobj = tobj;
}
freepath = NULL;
fullpath = "";
if (lobj) {
vp = NULL;
switch(lobj->type) {
case OBJT_DEFAULT:
kve->kve_type = KVME_TYPE_DEFAULT;
break;
case OBJT_VNODE:
kve->kve_type = KVME_TYPE_VNODE;
vp = lobj->handle;
vref(vp);
break;
case OBJT_SWAP:
kve->kve_type = KVME_TYPE_SWAP;
break;
case OBJT_DEVICE:
kve->kve_type = KVME_TYPE_DEVICE;
break;
case OBJT_PHYS:
kve->kve_type = KVME_TYPE_PHYS;
break;
case OBJT_DEAD:
kve->kve_type = KVME_TYPE_DEAD;
break;
default:
kve->kve_type = KVME_TYPE_UNKNOWN;
break;
}
if (lobj != obj)
VM_OBJECT_UNLOCK(lobj);
kve->kve_ref_count = obj->ref_count;
kve->kve_shadow_count = obj->shadow_count;
VM_OBJECT_UNLOCK(obj);
if (vp != NULL) {
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY,
curthread);
vn_fullpath(curthread, vp, &fullpath,
&freepath);
vput(vp);
VFS_UNLOCK_GIANT(vfslocked);
}
} else {
kve->kve_type = KVME_TYPE_NONE;
kve->kve_ref_count = 0;
kve->kve_shadow_count = 0;
}
kve->kve_start = (void*)entry->start;
kve->kve_end = (void*)entry->end;
if (entry->protection & VM_PROT_READ)
kve->kve_protection |= KVME_PROT_READ;
if (entry->protection & VM_PROT_WRITE)
kve->kve_protection |= KVME_PROT_WRITE;
if (entry->protection & VM_PROT_EXECUTE)
kve->kve_protection |= KVME_PROT_EXEC;
if (entry->eflags & MAP_ENTRY_COW)
kve->kve_flags |= KVME_FLAG_COW;
if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
if (freepath != NULL)
free(freepath, M_TEMP);
last_timestamp = map->timestamp;
vm_map_unlock_read(map);
error = SYSCTL_OUT(req, kve, sizeof(*kve));
vm_map_lock_read(map);
if (error)
break;
if (last_timestamp + 1 != map->timestamp) {
vm_map_lookup_entry(map, addr - 1, &tmp_entry);
entry = tmp_entry;
}
}
vm_map_unlock_read(map);
PRELE(p);
free(kve, M_TEMP);
return (error);
}
SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table");
SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT,
0, 0, sysctl_kern_proc, "S,proc", "Return entire process table");
@ -1353,3 +1508,6 @@ static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
CTLFLAG_RD, sysctl_kern_proc, "Return process table, no threads");
static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD,
sysctl_kern_proc_vmmap, "Process vm map entries");

View File

@ -456,6 +456,8 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
#define KERN_PROC_RGID 10 /* by real group id */
#define KERN_PROC_GID 11 /* by effective group id */
#define KERN_PROC_PATHNAME 12 /* path to executable */
#define KERN_PROC_VMMAP 13 /* VM map entries for process */
#define KERN_PROC_FILEDESC 14 /* File descriptors for process */
#define KERN_PROC_INC_THREAD 0x10 /*
* modifier for pid, pgrp, tty,
* uid, ruid, gid, rgid and proc
@ -619,6 +621,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
extern struct sysctl_oid_list sysctl__children;
SYSCTL_DECL(_kern);
SYSCTL_DECL(_kern_ipc);
SYSCTL_DECL(_kern_proc);
SYSCTL_DECL(_sysctl);
SYSCTL_DECL(_vm);
SYSCTL_DECL(_vm_stats);

View File

@ -1,6 +1,8 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1991, 1993
* The Regents of the University of California. All rights reserved.
* The Regents of the University of California.
* Copyright (c) 2007 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -56,6 +58,9 @@
#ifndef _SYS_SIGNALVAR_H_
#include <sys/signalvar.h>
#endif
#ifndef _SYS_SOCKET_VAR_H_
#include <sys/socket.h>
#endif
/*
* KERN_PROC subtype ops return arrays of selected proc structure entries:
@ -228,4 +233,90 @@ struct user {
struct kinfo_proc u_kproc; /* eproc */
};
/*
* The KERN_PROC_FILE sysctl allows a process to dumpt the file descriptor
* array of another process.
*/
#define KF_TYPE_NONE 0
#define KF_TYPE_VNODE 1
#define KF_TYPE_SOCKET 2
#define KF_TYPE_PIPE 3
#define KF_TYPE_FIFO 4
#define KF_TYPE_KQUEUE 5
#define KF_TYPE_CRYPTO 6
#define KF_TYPE_MQUEUE 7
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
#define KF_VTYPE_VREG 1
#define KF_VTYPE_VDIR 2
#define KF_VTYPE_VBLK 3
#define KF_VTYPE_VCHR 4
#define KF_VTYPE_VLNK 5
#define KF_VTYPE_VSOCK 6
#define KF_VTYPE_VFIFO 7
#define KF_VTYPE_VBAD 8
#define KF_VTYPE_UNKNOWN 255
#define KF_FLAG_READ 0x00000001
#define KF_FLAG_WRITE 0x00000002
#define KF_FLAG_APPEND 0x00000004
#define KF_FLAG_ASYNC 0x00000008
#define KF_FLAG_FSYNC 0x00000010
#define KF_FLAG_NONBLOCK 0x00000020
#define KF_FLAG_DIRECT 0x00000040
#define KF_FLAG_HASLOCK 0x00000080
struct kinfo_file {
int kf_structsize; /* Size of kinfo_file. */
int kf_type; /* Descriptor type. */
int kf_fd; /* Array index. */
int kf_ref_count; /* Reference count. */
int kf_flags; /* Flags. */
off_t kf_offset; /* Seek location. */
int kf_vnode_type; /* Vnode type. */
int kf_sock_domain; /* Socket domain. */
int kf_sock_type; /* Socket type. */
int kf_sock_protocol; /* Socket protocol. */
char kf_path[PATH_MAX]; /* Path to file, if any. */
struct sockaddr_storage kf_sa_local; /* Socket address. */
struct sockaddr_storage kf_sa_peer; /* Peer address. */
};
/*
* The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of
* another process as a series of entries.
*/
#define KVME_TYPE_NONE 0
#define KVME_TYPE_DEFAULT 1
#define KVME_TYPE_VNODE 2
#define KVME_TYPE_SWAP 3
#define KVME_TYPE_DEVICE 4
#define KVME_TYPE_PHYS 5
#define KVME_TYPE_DEAD 6
#define KVME_TYPE_UNKNOWN 255
#define KVME_PROT_READ 0x00000001
#define KVME_PROT_WRITE 0x00000002
#define KVME_PROT_EXEC 0x00000004
#define KVME_FLAG_COW 0x00000001
#define KVME_FLAG_NEEDS_COPY 0x00000002
struct kinfo_vmentry {
int kve_structsize; /* Size of kinfo_vmmapentry. */
int kve_type; /* Type of map entry. */
void *kve_start; /* Starting pointer. */
void *kve_end; /* Finishing pointer. */
int kve_flags; /* Flags on map entry. */
int kve_resident; /* Number of resident pages. */
int kve_private_resident; /* Number of private pages. */
int kve_protection; /* Protection bitmask. */
int kve_ref_count; /* VM obj ref count. */
int kve_shadow_count; /* VM obj shadow count. */
char kve_path[PATH_MAX]; /* Path to VM obj, if any. */
void *_kve_pspare[8]; /* Space for more stuff. */
int _kve_ispare[8]; /* Space for more stuff. */
};
#endif