Add new vnode dumper to support live minidumps
This dumper can instantiate and write the dump's contents to a file-backed vnode. Unlike existing disk or network dumpers, the vnode dumper should not be invoked during a system panic, and therefore is not added to the global dumper_configs list. Instead, the vnode dumper is constructed ad-hoc when a live dump is requested using the new ioctl on /dev/mem. This is similar in spirit to a kgdb session against the live system via /dev/mem. As described briefly in the mem(4) man page, live dumps are not guaranteed to result in a usuable output file, but offer some debugging value where forcefully panicing a system to dump its memory is not desirable/feasible. A future change to savecore(8) will add an option to save a live dump. Reviewed by: markj, Pau Amma <pauamma@gundo.com> (manpages) Discussed with: kib MFC after: 3 weeks Sponsored by: Juniper Networks, Inc. Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D33813
This commit is contained in:
parent
59c27ea18c
commit
c9114f9f86
@ -202,6 +202,50 @@ to update an existing or establish a new range, or to
|
||||
.Dv MEMRANGE_SET_REMOVE
|
||||
to remove a range.
|
||||
.El
|
||||
.Ss Live Kernel Dumps
|
||||
.Pp
|
||||
The
|
||||
.Dv MEM_KERNELDUMP
|
||||
ioctl will initiate a kernel dump against the running system, the contents of
|
||||
which will be written to a process-owned file descriptor.
|
||||
The resulting dump output will be in minidump format.
|
||||
The request is described by
|
||||
.Bd -literal
|
||||
struct mem_livedump_arg {
|
||||
int fd; /* input */
|
||||
int flags /* input */
|
||||
uint8_t compression /* input */
|
||||
};
|
||||
.Ed
|
||||
.Pp
|
||||
The
|
||||
.Va fd
|
||||
field is used to pass the file descriptor.
|
||||
.Pp
|
||||
The
|
||||
.Va flags
|
||||
field is currently unused and must be set to zero.
|
||||
.Pp
|
||||
The
|
||||
.Va compression
|
||||
field can be used to specify the desired compression to
|
||||
be applied to the dump output.
|
||||
The supported values are defined in
|
||||
.In sys/kerneldump.h ;
|
||||
that is,
|
||||
.Dv KERNELDUMP_COMP_NONE ,
|
||||
.Dv KERNELDUMP_COMP_GZIP ,
|
||||
or
|
||||
.Dv KERNELDUMP_COMP_ZSTD .
|
||||
.Pp
|
||||
Kernel dumps taken against the running system may have inconsistent kernel data
|
||||
structures due to allocation, deallocation, or modification of memory
|
||||
concurrent to the dump procedure.
|
||||
Thus, the resulting core dump is not guaranteed to be usable.
|
||||
A system under load is more likely to produce an inconsistent result.
|
||||
Despite this, live kernel dumps can be useful for offline debugging of certain
|
||||
types of kernel bugs, such as deadlocks, or in inspecting a particular part of
|
||||
the system's state.
|
||||
.Sh RETURN VALUES
|
||||
.Ss MEM_EXTRACT_PADDR
|
||||
The
|
||||
@ -229,6 +273,24 @@ base/length supplied.
|
||||
An attempt to remove a range failed because the range is permanently
|
||||
enabled.
|
||||
.El
|
||||
.Ss MEM_KERNELDUMP
|
||||
.Bl -tag -width Er
|
||||
.It Bq Er EOPNOTSUPP
|
||||
Kernel minidumps are not supported on this architecture.
|
||||
.It Bq Er EPERM
|
||||
An attempt to begin the kernel dump failed because the calling thread lacks the
|
||||
.It Bq Er EBADF
|
||||
The supplied file descriptor was invalid, or does not have write permission.
|
||||
.It Bq Er EBUSY
|
||||
An attempt to begin the kernel dump failed because one is already in progress.
|
||||
.It Bq Er EINVAL
|
||||
An invalid or unsupported value was specified in
|
||||
.Va flags .
|
||||
.It Bq Er EINVAL
|
||||
An invalid or unsupported compression type was specified.
|
||||
.Dv PRIV_KMEM_READ
|
||||
privilege.
|
||||
.El
|
||||
.Sh FILES
|
||||
.Bl -tag -width /dev/kmem -compact
|
||||
.It Pa /dev/mem
|
||||
|
@ -3839,6 +3839,7 @@ kern/kern_tslog.c optional tslog
|
||||
kern/kern_ubsan.c optional kubsan
|
||||
kern/kern_umtx.c standard
|
||||
kern/kern_uuid.c standard
|
||||
kern/kern_vnodedumper.c standard
|
||||
kern/kern_xxx.c standard
|
||||
kern/link_elf.c standard
|
||||
kern/linker_if.m standard
|
||||
|
@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/ioccom.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/kerneldump.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/memrange.h>
|
||||
@ -96,6 +97,7 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
|
||||
{
|
||||
vm_map_t map;
|
||||
vm_map_entry_t entry;
|
||||
const struct mem_livedump_arg *marg;
|
||||
struct mem_extract *me;
|
||||
int error;
|
||||
|
||||
@ -120,6 +122,10 @@ memioctl(struct cdev *dev, u_long cmd, caddr_t data, int flags,
|
||||
}
|
||||
vm_map_unlock_read(map);
|
||||
break;
|
||||
case MEM_KERNELDUMP:
|
||||
marg = (const struct mem_livedump_arg *)data;
|
||||
error = livedump_start(marg->fd, marg->flags, marg->compression);
|
||||
break;
|
||||
default:
|
||||
error = memioctl_md(dev, cmd, data, flags, td);
|
||||
break;
|
||||
|
@ -390,6 +390,17 @@ print_uptime(void)
|
||||
printf("%lds\n", (long)ts.tv_sec);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up a context that can be extracted from the dump.
|
||||
*/
|
||||
void
|
||||
dump_savectx(void)
|
||||
{
|
||||
|
||||
savectx(&dumppcb);
|
||||
dumptid = curthread->td_tid;
|
||||
}
|
||||
|
||||
int
|
||||
doadump(boolean_t textdump)
|
||||
{
|
||||
@ -402,8 +413,7 @@ doadump(boolean_t textdump)
|
||||
if (TAILQ_EMPTY(&dumper_configs))
|
||||
return (ENXIO);
|
||||
|
||||
savectx(&dumppcb);
|
||||
dumptid = curthread->td_tid;
|
||||
dump_savectx();
|
||||
dumping++;
|
||||
|
||||
coredump = TRUE;
|
||||
|
202
sys/kern/kern_vnodedumper.c
Normal file
202
sys/kern/kern_vnodedumper.c
Normal file
@ -0,0 +1,202 @@
|
||||
/*-
|
||||
* Copyright (c) 2021-2022 Juniper Networks
|
||||
*
|
||||
* This software was developed by Mitchell Horne <mhorne@FreeBSD.org>
|
||||
* under sponsorship from Juniper Networks and Klara Systems.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/caprights.h>
|
||||
#include <sys/disk.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/kerneldump.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/namei.h>
|
||||
#include <sys/priv.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/vnode.h>
|
||||
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
static dumper_start_t vnode_dumper_start;
|
||||
static dumper_t vnode_dump;
|
||||
static dumper_hdr_t vnode_write_headers;
|
||||
|
||||
static struct sx livedump_sx;
|
||||
SX_SYSINIT(livedump, &livedump_sx, "Livedump sx");
|
||||
|
||||
/*
|
||||
* Invoke a live minidump on the system.
|
||||
*/
|
||||
int
|
||||
livedump_start(int fd, int flags, uint8_t compression)
|
||||
{
|
||||
#if MINIDUMP_PAGE_TRACKING == 1
|
||||
struct dumperinfo di, *livedi;
|
||||
struct diocskerneldump_arg kda;
|
||||
struct vnode *vp;
|
||||
struct file *fp;
|
||||
void *rl_cookie;
|
||||
int error;
|
||||
|
||||
error = priv_check(curthread, PRIV_KMEM_READ);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
if (flags != 0)
|
||||
return (EINVAL);
|
||||
|
||||
error = getvnode(curthread, fd, &cap_write_rights, &fp);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
vp = fp->f_vnode;
|
||||
|
||||
if ((fp->f_flag & FWRITE) == 0) {
|
||||
error = EBADF;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* Set up a new dumper. */
|
||||
bzero(&di, sizeof(di));
|
||||
di.dumper_start = vnode_dumper_start;
|
||||
di.dumper = vnode_dump;
|
||||
di.dumper_hdr = vnode_write_headers;
|
||||
di.blocksize = PAGE_SIZE; /* Arbitrary. */
|
||||
di.maxiosize = MAXDUMPPGS * PAGE_SIZE;
|
||||
|
||||
bzero(&kda, sizeof(kda));
|
||||
kda.kda_compression = compression;
|
||||
error = dumper_create(&di, "livedump", &kda, &livedi);
|
||||
if (error != 0)
|
||||
goto drop;
|
||||
|
||||
/* Only allow one livedump to proceed at a time. */
|
||||
if (sx_try_xlock(&livedump_sx) == 0) {
|
||||
dumper_destroy(livedi);
|
||||
error = EBUSY;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* To be used by the callback functions. */
|
||||
livedi->priv = vp;
|
||||
|
||||
/* Lock the entire file range and vnode. */
|
||||
rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
|
||||
dump_savectx();
|
||||
error = minidumpsys(livedi, true);
|
||||
|
||||
VOP_UNLOCK(vp);
|
||||
vn_rangelock_unlock(vp, rl_cookie);
|
||||
sx_xunlock(&livedump_sx);
|
||||
dumper_destroy(livedi);
|
||||
drop:
|
||||
fdrop(fp, curthread);
|
||||
return (error);
|
||||
#else
|
||||
return (EOPNOTSUPP);
|
||||
#endif /* MINIDUMP_PAGE_TRACKING == 1 */
|
||||
}
|
||||
|
||||
int
|
||||
vnode_dumper_start(struct dumperinfo *di, void *key, uint32_t keysize)
|
||||
{
|
||||
|
||||
/* Always begin with an offset of zero. */
|
||||
di->dumpoff = 0;
|
||||
|
||||
KASSERT(keysize == 0, ("encryption not supported for livedumps"));
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback from dumpsys() to dump a chunk of memory.
|
||||
*
|
||||
* Parameters:
|
||||
* arg Opaque private pointer to vnode
|
||||
* virtual Virtual address (where to read the data from)
|
||||
* physical Physical memory address (unused)
|
||||
* offset Offset from start of core file
|
||||
* length Data length
|
||||
*
|
||||
* Return value:
|
||||
* 0 on success
|
||||
* errno on error
|
||||
*/
|
||||
int
|
||||
vnode_dump(void *arg, void *virtual, vm_offset_t physical __unused,
|
||||
off_t offset, size_t length)
|
||||
{
|
||||
struct vnode *vp;
|
||||
int error = 0;
|
||||
|
||||
vp = arg;
|
||||
MPASS(vp != NULL);
|
||||
ASSERT_VOP_LOCKED(vp, __func__);
|
||||
|
||||
/* Done? */
|
||||
if (virtual == NULL)
|
||||
return (0);
|
||||
|
||||
error = vn_rdwr(UIO_WRITE, vp, virtual, length, offset, UIO_SYSSPACE,
|
||||
IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL, curthread);
|
||||
if (error != 0)
|
||||
uprintf("%s: error writing livedump block at offset %jx: %d\n",
|
||||
__func__, (uintmax_t)offset, error);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback from dumpsys() to write out the dump header, placed at the end.
|
||||
*/
|
||||
int
|
||||
vnode_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh)
|
||||
{
|
||||
struct vnode *vp;
|
||||
int error;
|
||||
off_t offset;
|
||||
|
||||
vp = di->priv;
|
||||
MPASS(vp != NULL);
|
||||
ASSERT_VOP_LOCKED(vp, __func__);
|
||||
|
||||
/* Compensate for compression/encryption adjustment of dumpoff. */
|
||||
offset = roundup2(di->dumpoff, di->blocksize);
|
||||
|
||||
/* Write the kernel dump header to the end of the file. */
|
||||
error = vn_rdwr(UIO_WRITE, vp, kdh, sizeof(*kdh), offset,
|
||||
UIO_SYSSPACE, IO_NODELOCKED, curthread->td_ucred, NOCRED, NULL,
|
||||
curthread);
|
||||
if (error != 0)
|
||||
uprintf("%s: error writing livedump header: %d\n", __func__,
|
||||
error);
|
||||
return (error);
|
||||
}
|
@ -362,6 +362,7 @@ struct dumperinfo {
|
||||
|
||||
extern int dumping; /* system is dumping */
|
||||
|
||||
void dump_savectx(void);
|
||||
int doadump(boolean_t);
|
||||
struct diocskerneldump_arg;
|
||||
int dumper_create(const struct dumperinfo *di_template, const char *devname,
|
||||
|
@ -162,6 +162,8 @@ void dumpsys_pb_progress(size_t);
|
||||
|
||||
extern int do_minidump;
|
||||
|
||||
int livedump_start(int, int, uint8_t);
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_KERNELDUMP_H */
|
||||
|
@ -59,6 +59,16 @@ struct mem_extract {
|
||||
|
||||
#define MEM_EXTRACT_PADDR _IOWR('m', 52, struct mem_extract)
|
||||
|
||||
struct mem_livedump_arg {
|
||||
int fd;
|
||||
int flags;
|
||||
uint8_t compression;
|
||||
uint8_t pad1[7];
|
||||
uint64_t pad2[2];
|
||||
};
|
||||
|
||||
#define MEM_KERNELDUMP _IOW('m', 53, struct mem_livedump_arg)
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
MALLOC_DECLARE(M_MEMDESC);
|
||||
|
Loading…
Reference in New Issue
Block a user