Introduce minidumps. Full physical memory crash dumps are still available

via the debug.minidump sysctl and tunable.

Traditional dumps store all physical memory.  This was once a good thing
when machines had a maximum of 64M of ram and 1GB of kvm.  These days,
machines often have many gigabytes of ram and a smaller amount of kvm.
libkvm+kgdb don't have a way to access physical ram that is not mapped
into kvm at the time of the crash dump, so the extra ram being dumped
is mostly wasted.

Minidumps invert the process.  Instead of dumping physical memory in
in order to guarantee that all of kvm's backing is dumped, minidumps
instead dump only memory that is actively mapped into kvm.

amd64 has a direct map region that things like UMA use.  Obviously we
cannot dump all of the direct map region because that is effectively
an old style all-physical-memory dump.  Instead, introduce a bitmap
and two helper routines (dump_add_page(pa) and dump_drop_page(pa)) that
allow certain critical direct map pages to be included in the dump.
uma_machdep.c's allocator is the intended consumer.

Dumps are a custom format.  At the very beginning of the file is a header,
then a copy of the message buffer, then the bitmap of pages present in
the dump, then the final level of the kvm page table trees (2MB mappings
are expanded into a 4K page mappings), then the sparse physical pages
according to the bitmap.  libkvm can now conveniently access the kvm
page table entries.

Booting my test 8GB machine, forcing it into ddb and forcing a dump
leads to a 48MB minidump.  While this is a best case, I expect minidumps
to be in the 100MB-500MB range.  Obviously, never larger than physical
memory of course.

minidumps are on by default.  It would want be necessary to turn them off
if it was necessary to debug corrupt kernel page table management as that
would mess up minidumps as well.

Both minidumps and regular dumps are supported on the same machine.
This commit is contained in:
Peter Wemm 2006-04-21 04:24:50 +00:00
parent 4f00efe0cb
commit c0345a84aa
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=157908
8 changed files with 513 additions and 3 deletions

View File

@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <vm/vm.h>
@ -40,6 +41,11 @@ __FBSDID("$FreeBSD$");
CTASSERT(sizeof(struct kerneldumpheader) == 512);
int do_minidump = 1;
TUNABLE_INT("debug.minidump", &do_minidump);
SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0,
"Enable mini crash dumps");
/*
* Don't touch the first SIZEOF_METADATA bytes on the dump device. This
* is to protect us from metadata and to protect metadata from us.
@ -272,6 +278,10 @@ dumpsys(struct dumperinfo *di)
size_t hdrsz;
int error;
if (do_minidump) {
minidumpsys(di);
return;
}
bzero(&ehdr, sizeof(ehdr));
ehdr.e_ident[EI_MAG0] = ELFMAG0;
ehdr.e_ident[EI_MAG1] = ELFMAG1;

View File

@ -0,0 +1,420 @@
/*-
* Copyright (c) 2006 Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <sys/msgbuf.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/atomic.h>
#include <machine/elf.h>
#include <machine/md_var.h>
#include <machine/vmparam.h>
#include <machine/minidump.h>
CTASSERT(sizeof(struct kerneldumpheader) == 512);
/*
* Don't touch the first SIZEOF_METADATA bytes on the dump device. This
* is to protect us from metadata and to protect metadata from us.
*/
#define SIZEOF_METADATA (64*1024)
#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
extern uint64_t KPDPphys;
uint64_t *vm_page_dump;
int vm_page_dump_size;
static struct kerneldumpheader kdh;
static off_t dumplo;
/* Handle chunked writes. */
static size_t fragsz;
static void *dump_va;
static size_t counter, progress;
CTASSERT(sizeof(*vm_page_dump) == 8);
static int
is_dumpable(vm_paddr_t pa)
{
int i;
for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
return (1);
}
return (0);
}
/* XXX should be MI */
static void
mkdumpheader(struct kerneldumpheader *kdh, uint32_t archver, uint64_t dumplen,
uint32_t blksz)
{
bzero(kdh, sizeof(*kdh));
strncpy(kdh->magic, KERNELDUMPMAGIC, sizeof(kdh->magic));
strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
kdh->version = htod32(KERNELDUMPVERSION);
kdh->architectureversion = htod32(archver);
kdh->dumplength = htod64(dumplen);
kdh->dumptime = htod64(time_second);
kdh->blocksize = htod32(blksz);
strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
if (panicstr != NULL)
strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
kdh->parity = kerneldump_parity(kdh);
}
#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
static int
blk_flush(struct dumperinfo *di)
{
int error;
if (fragsz == 0)
return (0);
error = di->dumper(di->priv, dump_va, 0, dumplo, fragsz);
dumplo += fragsz;
fragsz = 0;
return (error);
}
static int
blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
{
size_t len;
int error, i, c;
error = 0;
if ((sz % PAGE_SIZE) != 0) {
printf("size not page aligned\n");
return (EINVAL);
}
if (ptr != NULL && pa != 0) {
printf("cant have both va and pa!\n");
return (EINVAL);
}
if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
printf("address not page aligned\n");
return (EINVAL);
}
if (ptr != NULL) {
/* If we're doing a virtual dump, flush any pre-existing pa pages */
error = blk_flush(di);
if (error)
return (error);
}
while (sz) {
len = (MAXDUMPPGS * PAGE_SIZE) - fragsz;
if (len > sz)
len = sz;
counter += len;
progress -= len;
if (counter >> 24) {
printf(" %ld", PG2MB(progress >> PAGE_SHIFT));
counter &= (1<<24) - 1;
}
if (ptr) {
error = di->dumper(di->priv, ptr, 0, dumplo, len);
if (error)
return (error);
dumplo += len;
ptr += len;
sz -= len;
} else {
for (i = 0; i < len; i += PAGE_SIZE)
dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
fragsz += len;
pa += len;
sz -= len;
if (fragsz == (MAXDUMPPGS * PAGE_SIZE)) {
error = blk_flush(di);
if (error)
return (error);
}
}
/* Check for user abort. */
c = cncheckc();
if (c == 0x03)
return (ECANCELED);
if (c != -1)
printf(" (CTRL-C to abort) ");
}
return (0);
}
/* A fake page table page, to avoid having to handle both 4K and 2M pages */
static pt_entry_t fakept[NPTEPG];
void
minidumpsys(struct dumperinfo *di)
{
uint64_t dumpsize;
uint32_t ptesize;
vm_offset_t va;
int error;
uint64_t bits;
uint64_t *pdp, *pd, *pt, pa;
int i, j, k, bit;
struct minidumphdr mdhdr;
counter = 0;
/* Walk page table pages, set bits in vm_page_dump */
ptesize = 0;
pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
/*
* We always write a page, even if it is zero. Each
* page written corresponds to 2MB of space
*/
ptesize += PAGE_SIZE;
if ((pdp[i] & PG_V) == 0)
continue;
pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
pa = pd[j] & PG_FRAME & ~PDRMASK;
for (k = 0; k < NPTEPG; k++) {
if (is_dumpable(pa))
dump_add_page(pa);
pa += PAGE_SIZE;
}
continue;
}
if ((pd[j] & PG_V) == PG_V) {
/* set bit for each valid page in this 2MB block */
pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
for (k = 0; k < NPTEPG; k++) {
if ((pt[k] & PG_V) == PG_V) {
pa = pt[k] & PG_FRAME;
if (is_dumpable(pa))
dump_add_page(pa);
}
}
} else {
/* nothing, we're going to dump a null page */
}
}
/* Calculate dump size. */
dumpsize = ptesize;
dumpsize += round_page(msgbufp->msg_size);
dumpsize += round_page(vm_page_dump_size);
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
bit = bsfq(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
/* Clear out undumpable pages now if needed */
if (is_dumpable(pa)) {
dumpsize += PAGE_SIZE;
} else {
dump_drop_page(pa);
}
bits &= ~(1ul << bit);
}
}
dumpsize += PAGE_SIZE;
/* Determine dump offset on device. */
if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
error = ENOSPC;
goto fail;
}
dumplo = di->mediaoffset + di->mediasize - dumpsize;
dumplo -= sizeof(kdh) * 2;
progress = dumpsize;
/* Initialize mdhdr */
bzero(&mdhdr, sizeof(mdhdr));
strcpy(mdhdr.magic, MINIDUMP_MAGIC);
mdhdr.version = MINIDUMP_VERSION;
mdhdr.msgbufsize = msgbufp->msg_size;
mdhdr.bitmapsize = vm_page_dump_size;
mdhdr.ptesize = ptesize;
mdhdr.kernbase = KERNBASE;
mdhdr.dmapbase = DMAP_MIN_ADDRESS;
mdhdr.dmapend = DMAP_MAX_ADDRESS;
mkdumpheader(&kdh, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
printf("Dumping %llu MB:", (long long)dumpsize >> 20);
/* Dump leader */
error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
if (error)
goto fail;
dumplo += sizeof(kdh);
/* Dump my header */
bzero(&fakept, sizeof(fakept));
bcopy(&mdhdr, &fakept, sizeof(mdhdr));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* Dump msgbuf up front */
error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
if (error)
goto fail;
/* Dump bitmap */
error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
if (error)
goto fail;
/* Dump kernel page table pages */
pdp = (uint64_t *)PHYS_TO_DMAP(KPDPphys);
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
/* We always write a page, even if it is zero */
if ((pdp[i] & PG_V) == 0) {
bzero(fakept, sizeof(fakept));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
continue;
}
pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME);
j = ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is a single 2M block. Generate a fake PTP */
pa = pd[j] & PG_FRAME & ~PDRMASK;
for (k = 0; k < NPTEPG; k++) {
fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
}
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
continue;
}
if ((pd[j] & PG_V) == PG_V) {
pt = (uint64_t *)PHYS_TO_DMAP(pd[j] & PG_FRAME);
error = blk_write(di, (char *)pt, 0, PAGE_SIZE);
if (error)
goto fail;
} else {
bzero(fakept, sizeof(fakept));
error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
if (error)
goto fail;
/* flush, in case we reuse fakept in the same block */
error = blk_flush(di);
if (error)
goto fail;
}
}
/* Dump memory chunks */
/* XXX cluster it up and use blk_dump() */
for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
bits = vm_page_dump[i];
while (bits) {
bit = bsfq(bits);
pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
error = blk_write(di, 0, pa, PAGE_SIZE);
if (error)
goto fail;
bits &= ~(1ul << bit);
}
}
error = blk_flush(di);
if (error)
goto fail;
/* Dump trailer */
error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
if (error)
goto fail;
dumplo += sizeof(kdh);
/* Signal completion, signoff and exit stage left. */
di->dumper(di->priv, NULL, 0, 0, 0);
printf("\nDump complete\n");
return;
fail:
if (error < 0)
error = -error;
if (error == ECANCELED)
printf("\nDump aborted\n");
else if (error == ENOSPC)
printf("\nDump failed. Partition too small.\n");
else
printf("\n** DUMP FAILED (ERROR %d) **\n", error);
}
void
dump_add_page(vm_paddr_t pa)
{
int idx, bit;
pa >>= PAGE_SHIFT;
idx = pa >> 6; /* 2^6 = 64 */
bit = pa & 63;
atomic_set_long(&vm_page_dump[idx], 1ul << bit);
}
void
dump_drop_page(vm_paddr_t pa)
{
int idx, bit;
pa >>= PAGE_SHIFT;
idx = pa >> 6; /* 2^6 = 64 */
bit = pa & 63;
atomic_clear_long(&vm_page_dump[idx], 1ul << bit);
}

View File

@ -181,7 +181,7 @@ pt_entry_t pg_nx;
static u_int64_t KPTphys; /* phys addr of kernel level 1 */
static u_int64_t KPDphys; /* phys addr of kernel level 2 */
static u_int64_t KPDPphys; /* phys addr of kernel level 3 */
u_int64_t KPDPphys; /* phys addr of kernel level 3 */
u_int64_t KPML4phys; /* phys addr of kernel level 4 */
static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */

View File

@ -44,6 +44,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
static vm_pindex_t colour;
vm_page_t m;
vm_paddr_t pa;
void *va;
int pflags;
@ -64,7 +65,9 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
} else
break;
}
va = (void *)PHYS_TO_DMAP(m->phys_addr);
pa = m->phys_addr;
dump_add_page(pa);
va = (void *)PHYS_TO_DMAP(pa);
if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
pagezero(va);
return (va);
@ -74,8 +77,11 @@ void
uma_small_free(void *mem, int size, u_int8_t flags)
{
vm_page_t m;
vm_paddr_t pa;
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem));
pa = DMAP_TO_PHYS((vm_offset_t)mem);
dump_drop_page(pa);
m = PHYS_TO_VM_PAGE(pa);
vm_page_lock_queues();
vm_page_free(m);
vm_page_unlock_queues();

View File

@ -53,6 +53,8 @@ extern char cpu_vendor[];
extern char kstack[];
extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
extern int vm_page_dump_size;
extern struct pcpu __pcpu[];
@ -61,11 +63,14 @@ struct thread;
struct reg;
struct fpreg;
struct dbreg;
struct dumperinfo;
void busdma_swi(void);
void cpu_setregs(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
void initializecpu(void);
void fillw(int /*u_short*/ pat, void *base, size_t cnt);
void fpstate_drop(struct thread *td);
@ -75,5 +80,6 @@ void pagecopy(void *from, void *to);
void pagezero(void *addr);
void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist);
int user_dbreg_trap(void);
void minidumpsys(struct dumperinfo *);
#endif /* !_MACHINE_MD_VAR_H_ */

View File

@ -0,0 +1,46 @@
/*-
* Copyright (c) 2006 Peter Wemm
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_MINIDUMP_H_
#define _MACHINE_MINIDUMP_H_ 1
#define MINIDUMP_MAGIC "minidump FreeBSD/amd64"
#define MINIDUMP_VERSION 1
struct minidumphdr {
char magic[24];
uint32_t version;
uint32_t msgbufsize;
uint32_t bitmapsize;
uint32_t ptesize;
uint64_t kernbase;
uint64_t dmapbase;
uint64_t dmapend;
};
#endif /* _MACHINE_MINIDUMP_H_ */

View File

@ -104,6 +104,7 @@ amd64/amd64/local_apic.c standard
amd64/amd64/locore.S standard no-obj
amd64/amd64/machdep.c standard
amd64/amd64/mem.c optional mem
amd64/amd64/minidump_machdep.c standard
amd64/amd64/mp_machdep.c optional smp
amd64/amd64/mp_watchdog.c optional mp_watchdog smp
amd64/amd64/mpboot.S optional smp

View File

@ -121,6 +121,8 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <machine/md_var.h>
/*
* Associated with page of user-allocatable memory is a
* page structure.
@ -232,6 +234,25 @@ vm_page_startup(vm_offset_t vaddr)
bzero((void *)mapped, end - new_end);
uma_startup((void *)mapped, boot_pages);
#if defined(__amd64__) || defined(__i386__)
/*
* Allocate a bitmap to indicate that a random physical page
* needs to be included in a minidump.
*
* The amd64 port needs this to indicate which direct map pages
* need to be dumped, via calls to dump_add_page()/dump_drop_page().
*
* However, i386 still needs this workspace internally within the
* minidump code. In theory, they are not needed on i386, but are
* included should the sf_buf code decide to use them.
*/
page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
new_end -= vm_page_dump_size;
vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
bzero((void *)vm_page_dump, vm_page_dump_size);
#endif
/*
* Compute the number of pages of memory that will be available for
* use (taking into account the overhead of a page structure per