Rework and simplify the ksyms(4) implementation.

- Store the symbol table contents in an anonymous swap-backed object. Have
  mmap(/dev/ksyms) map that object, and stop mapping the symbol table into
  the calling process in ksyms_open(). Previously we would cache a pointer
  to the pmap of the opening process, and mmap(/dev/ksyms) would create a
  mapping using the physical address found by a pmap lookup at the initial
  mapping address. However, this assumes that the cached pmap is valid,
  which may not be the case. [1]
- Remove the ksyms ioctl interface. It appears to have been added to work
  around a limitation in libelf that no longer exists; see r321842.
  Moreover, the interface is difficult to support and isn't present in
  illumos. Since ksyms was added specifically to support lockstat(1), it
  is expected that this removal won't have any real impact.
- Simplify ksyms_read() to avoid unnecessary copying.
- Don't call the device handle destructor if we fail to capture a snapshot
  of the kernel's symbol table. devfs will do that for us.

Reported by:	Ilja van Sprundel <ivansprundel@ioactive.com> [1]
Reviewed by:	kib (previous revision)
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D11789
This commit is contained in:
Mark Johnston 2017-08-03 00:38:13 +00:00
parent 3cf56bb43a
commit 22e406c80b
5 changed files with 94 additions and 239 deletions

View File

@ -38,6 +38,9 @@
# xargs -n1 | sort | uniq -d;
# done
# 20170802: ksyms(4) ioctl interface was removed
OLD_FILES+=usr/include/sys/ksyms.h
# 20170722: new clang import which bumps version from 4.0.0 to 5.0.0.
OLD_FILES+=usr/lib/clang/4.0.0/include/sanitizer/allocator_interface.h
OLD_FILES+=usr/lib/clang/4.0.0/include/sanitizer/asan_interface.h

View File

@ -48,7 +48,6 @@
#include <kstat.h>
#else
#include <sys/elf.h>
#include <sys/ksyms.h>
#include <sys/param.h>
#include <sys/module.h>
#include <sys/linker.h>

View File

@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd April 5, 2009
.Dd August 2, 2017
.Dt KSYMS 4
.Os
.Sh NAME
@ -69,24 +69,6 @@ driver does not block the loading or unloading of modules into the kernel
while the
.Pa /dev/ksyms
file is open but may contain stale data.
.Sh IOCTLS
The
.Xr ioctl 2
command codes below are defined in
.Aq Pa sys/ksyms.h .
.Pp
The (third) argument to the
.Xr ioctl 2
should be a pointer to the type indicated.
.Bl -tag -width indent -offset indent
.It Dv KIOCGSIZE (size_t)
Returns the total size of the current symbol table.
This can be used when allocating a buffer to make a copy of
the kernel symbol table.
.It Dv KIOCGADDR (void *)
Returns the address of the kernel symbol table mapped in
the process memory.
.El
.Sh FILES
.Bl -tag -width /dev/ksymsX
.It Pa /dev/ksyms
@ -112,7 +94,6 @@ This may occur if the kernel was in the process of loading or
unloading a module.
.El
.Sh SEE ALSO
.Xr ioctl 2 ,
.Xr nlist 3 ,
.Xr elf 5 ,
.Xr kldload 8
@ -152,12 +133,3 @@ file once at a time.
The process must close the
.Pa /dev/ksyms
before it is allowed to open it again.
.Pp
The
.Nm
driver uses the calling process' memory address space to store the snapshot.
.Xr ioctl 2
can be used to get the memory address where the symbol table is stored to
save kernel memory.
.Xr mmap 2
may also be used but it will map it to another address.

View File

@ -32,16 +32,15 @@
#include <sys/conf.h>
#include <sys/elf.h>
#include <sys/ksyms.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/uio.h>
#include <machine/elf.h>
@ -49,7 +48,7 @@
#include <vm/pmap.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include "linker_if.h"
@ -68,18 +67,14 @@
static d_open_t ksyms_open;
static d_read_t ksyms_read;
static d_close_t ksyms_close;
static d_ioctl_t ksyms_ioctl;
static d_mmap_t ksyms_mmap;
static d_mmap_single_t ksyms_mmap_single;
static struct cdevsw ksyms_cdevsw = {
.d_version = D_VERSION,
.d_flags = D_TRACKCLOSE,
.d_open = ksyms_open,
.d_close = ksyms_close,
.d_read = ksyms_read,
.d_ioctl = ksyms_ioctl,
.d_mmap = ksyms_mmap,
.d_mmap_single = ksyms_mmap_single,
.d_name = KSYMS_DNAME
};
@ -87,11 +82,12 @@ struct ksyms_softc {
LIST_ENTRY(ksyms_softc) sc_list;
vm_offset_t sc_uaddr;
size_t sc_usize;
pmap_t sc_pmap;
vm_object_t sc_obj;
vm_size_t sc_objsz;
struct proc *sc_proc;
};
static struct mtx ksyms_mtx;
static struct sx ksyms_mtx;
static struct cdev *ksyms_dev;
static LIST_HEAD(, ksyms_softc) ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);
@ -112,6 +108,7 @@ struct tsizes {
};
struct toffsets {
struct ksyms_softc *to_sc;
vm_offset_t to_symoff;
vm_offset_t to_stroff;
unsigned to_stridx;
@ -155,10 +152,24 @@ ksyms_size_calc(struct tsizes *ts)
(void)linker_file_foreach(ksyms_size_permod, ts);
}
#define KSYMS_EMIT(src, des, sz) do { \
copyout(src, (void *)des, sz); \
des += sz; \
} while (0)
static int
ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz)
{
struct iovec iov;
struct uio uio;
iov.iov_base = buf;
iov.iov_len = sz;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = off;
uio.uio_resid = (ssize_t)sz;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_rw = UIO_WRITE;
uio.uio_td = curthread;
return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio));
}
#define SYMBLKSZ (256 * sizeof(Elf_Sym))
@ -170,24 +181,24 @@ static int
ksyms_add(linker_file_t lf, void *arg)
{
char *buf;
struct ksyms_softc *sc;
struct toffsets *to;
const Elf_Sym *symtab;
Elf_Sym *symp;
caddr_t strtab;
long symsz;
size_t strsz, numsyms;
size_t len, numsyms, strsz, symsz;
linker_symval_t symval;
int i, nsyms, len;
int error, i, nsyms;
buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
to = arg;
sc = to->to_sc;
MOD_SLOCK;
numsyms = LINKER_SYMTAB_GET(lf, &symtab);
strsz = LINKER_STRTAB_GET(lf, &strtab);
symsz = numsyms * sizeof(Elf_Sym);
buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
while (symsz > 0) {
len = min(SYMBLKSZ, symsz);
bcopy(symtab, buf, len);
@ -213,7 +224,13 @@ ksyms_add(linker_file_t lf, void *arg)
return (ENXIO);
}
to->to_resid -= len;
KSYMS_EMIT(buf, to->to_symoff, len);
error = ksyms_emit(sc, buf, to->to_symoff, len);
to->to_symoff += len;
if (error != 0) {
MOD_SUNLOCK;
free(buf, M_KSYMS);
return (error);
}
symtab += nsyms;
symsz -= len;
@ -224,10 +241,11 @@ ksyms_add(linker_file_t lf, void *arg)
if (strsz > to->to_resid)
return (ENXIO);
to->to_resid -= strsz;
KSYMS_EMIT(strtab, to->to_stroff, strsz);
error = ksyms_emit(sc, strtab, to->to_stroff, strsz);
to->to_stroff += strsz;
to->to_stridx += strsz;
return (0);
return (error);
}
/*
@ -236,11 +254,11 @@ ksyms_add(linker_file_t lf, void *arg)
* 0 on success, otherwise error.
*/
static int
ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid)
ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts)
{
struct ksyms_hdr *hdr;
struct toffsets to;
int error = 0;
struct ksyms_hdr *hdr;
int error;
hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);
@ -334,39 +352,41 @@ ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid)
/* Copy shstrtab into the header. */
bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset;
to.to_sc = sc;
to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset;
to.to_stridx = 0;
if (sizeof(struct ksyms_hdr) > resid) {
free(hdr, M_KSYMS);
return (ENXIO);
}
to.to_resid = resid - sizeof(struct ksyms_hdr);
to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr);
/* emit header */
copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr));
error = ksyms_emit(sc, hdr, 0, sizeof(*hdr));
free(hdr, M_KSYMS);
if (error != 0)
return (error);
/* Add symbol and string tables for each kernel module. */
error = linker_file_foreach(ksyms_add, &to);
if (error != 0)
return (error);
if (to.to_resid != 0)
return (ENXIO);
return (error);
return (0);
}
static void
ksyms_cdevpriv_dtr(void *data)
{
struct ksyms_softc *sc;
vm_object_t obj;
sc = (struct ksyms_softc *)data;
mtx_lock(&ksyms_mtx);
sx_xlock(&ksyms_mtx);
LIST_REMOVE(sc, sc_list);
mtx_unlock(&ksyms_mtx);
sx_xunlock(&ksyms_mtx);
obj = sc->sc_obj;
if (obj != NULL)
vm_object_deallocate(obj);
free(sc, M_KSYMS);
}
@ -375,34 +395,31 @@ ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
{
struct tsizes ts;
struct ksyms_softc *sc;
size_t total_elf_sz;
vm_size_t elfsz;
int error, try;
/*
* Limit one open() per process. The process must close()
* before open()'ing again.
*/
mtx_lock(&ksyms_mtx);
sx_xlock(&ksyms_mtx);
LIST_FOREACH(sc, &ksyms_list, sc_list) {
if (sc->sc_proc == td->td_proc) {
mtx_unlock(&ksyms_mtx);
sx_xunlock(&ksyms_mtx);
return (EBUSY);
}
}
sc = malloc(sizeof(*sc), M_KSYMS, M_NOWAIT | M_ZERO);
if (sc == NULL) {
mtx_unlock(&ksyms_mtx);
return (ENOMEM);
}
sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO);
sc->sc_proc = td->td_proc;
sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap;
LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
mtx_unlock(&ksyms_mtx);
sx_xunlock(&ksyms_mtx);
error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
if (error != 0)
goto failed;
if (error != 0) {
ksyms_cdevpriv_dtr(sc);
return (error);
}
/*
* MOD_SLOCK doesn't work here (because of a lock reversal with
@ -412,32 +429,20 @@ ksyms_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
* time.
*/
for (try = 0; try < 3; try++) {
/*
* Map a buffer in the calling process memory space and
* create a snapshot of the kernel symbol table in it.
*/
/* Compute the size of buffer needed. */
ksyms_size_calc(&ts);
total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz +
ts.ts_strsz;
elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz;
error = copyout_map(td, &sc->sc_uaddr, (vm_size_t)total_elf_sz);
if (error != 0)
break;
sc->sc_usize = total_elf_sz;
sc->sc_obj = vm_object_allocate(OBJT_DEFAULT,
OFF_TO_IDX(round_page(elfsz)));
sc->sc_objsz = elfsz;
error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz);
error = ksyms_snapshot(sc, &ts);
if (error == 0)
/* successful snapshot */
return (0);
break;
/* Snapshot failed, unmap the memory and try again. */
(void)copyout_unmap(td, sc->sc_uaddr, sc->sc_usize);
vm_object_deallocate(sc->sc_obj);
sc->sc_obj = NULL;
}
failed:
ksyms_cdevpriv_dtr(sc);
return (error);
}
@ -445,122 +450,35 @@ static int
ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
{
struct ksyms_softc *sc;
char *buf;
off_t off;
size_t len, sz;
vm_size_t ubase;
int error;
error = devfs_get_cdevpriv((void **)&sc);
if (error != 0)
return (error);
return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio));
}
static int
ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
vm_object_t *objp, int nprot)
{
struct ksyms_softc *sc;
vm_object_t obj;
int error;
error = devfs_get_cdevpriv((void **)&sc);
if (error != 0)
return (error);
off = uio->uio_offset;
len = uio->uio_resid;
if (*offset < 0 || *offset >= round_page(sc->sc_objsz) ||
size > round_page(sc->sc_objsz) - *offset ||
(nprot & ~PROT_READ) != 0)
return (EINVAL);
if (off < 0 || off > sc->sc_usize)
return (EFAULT);
if (len > sc->sc_usize - off)
len = sc->sc_usize - off;
if (len == 0)
obj = sc->sc_obj;
vm_object_reference(obj);
*objp = obj;
return (0);
/*
* Since the snapshot buffer is in the user space we have to copy it
* in to the kernel and then back out. The extra copy saves valuable
* kernel memory.
*/
buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK);
ubase = sc->sc_uaddr + off;
while (len) {
sz = min(PAGE_SIZE, len);
if (copyin((void *)ubase, buf, sz) != 0)
error = EFAULT;
else
error = uiomove(buf, sz, uio);
if (error != 0)
break;
len -= sz;
ubase += sz;
}
free(buf, M_KSYMS);
return (error);
}
static int
ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused,
struct thread *td __unused)
{
struct ksyms_softc *sc;
int error;
error = devfs_get_cdevpriv((void **)&sc);
if (error != 0)
return (error);
switch (cmd) {
case KIOCGSIZE:
/*
* Return the size (in bytes) of the symbol table
* snapshot.
*/
*(size_t *)data = sc->sc_usize;
break;
case KIOCGADDR:
/*
* Return the address of the symbol table snapshot.
* XXX - compat32 version of this?
*/
*(void **)data = (void *)sc->sc_uaddr;
break;
default:
error = ENOTTY;
break;
}
return (error);
}
static int
ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
int prot __unused, vm_memattr_t *memattr __unused)
{
struct ksyms_softc *sc;
int error;
error = devfs_get_cdevpriv((void **)&sc);
if (error != 0)
return (error);
/*
* XXX mmap() will actually map the symbol table into the process
* address space again.
*/
if (offset > round_page(sc->sc_usize) ||
(*paddr = pmap_extract(sc->sc_pmap,
(vm_offset_t)sc->sc_uaddr + offset)) == 0)
return (-1);
return (0);
}
static int
ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused,
struct thread *td)
{
struct ksyms_softc *sc;
int error;
error = devfs_get_cdevpriv((void **)&sc);
if (error != 0)
return (error);
/* Unmap the buffer from the process address space. */
return (copyout_unmap(td, sc->sc_uaddr, sc->sc_usize));
}
static int
@ -571,7 +489,7 @@ ksyms_modevent(module_t mod __unused, int type, void *data __unused)
error = 0;
switch (type) {
case MOD_LOAD:
mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF);
sx_init(&ksyms_mtx, "KSyms mtx");
ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
0400, KSYMS_DNAME);
break;
@ -579,7 +497,7 @@ ksyms_modevent(module_t mod __unused, int type, void *data __unused)
if (!LIST_EMPTY(&ksyms_list))
return (EBUSY);
destroy_dev(ksyms_dev);
mtx_destroy(&ksyms_mtx);
sx_destroy(&ksyms_mtx);
break;
case MOD_SHUTDOWN:
break;

View File

@ -1,37 +0,0 @@
/*-
* Copyright (c) 2008-2009, Stacey Son <sson@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_KSYMS_H_
#define _SYS_KSYMS_H_
#include <sys/ioccom.h>
#define KIOCGSIZE _IOR('l', 1, size_t)
#define KIOCGADDR _IOR('l', 2, void *)
#endif /* _SYS_KSYMS_H_ */