freebsd-dev/sys/amd64/amd64/dump_machdep.c
Peter Wemm c0345a84aa Introduce minidumps. Full physical memory crash dumps are still available
via the debug.minidump sysctl and tunable.

Traditional dumps store all physical memory.  This was once a good thing
when machines had a maximum of 64M of ram and 1GB of kvm.  These days,
machines often have many gigabytes of ram and a smaller amount of kvm.
libkvm+kgdb don't have a way to access physical ram that is not mapped
into kvm at the time of the crash dump, so the extra ram being dumped
is mostly wasted.

Minidumps invert the process.  Instead of dumping physical memory in
in order to guarantee that all of kvm's backing is dumped, minidumps
instead dump only memory that is actively mapped into kvm.

amd64 has a direct map region that things like UMA use.  Obviously we
cannot dump all of the direct map region because that is effectively
an old style all-physical-memory dump.  Instead, introduce a bitmap
and two helper routines (dump_add_page(pa) and dump_drop_page(pa)) that
allow certain critical direct map pages to be included in the dump.
uma_machdep.c's allocator is the intended consumer.

Dumps are a custom format.  At the very beginning of the file is a header,
then a copy of the message buffer, then the bitmap of pages present in
the dump, then the final level of the kvm page table trees (2MB mappings
are expanded into a 4K page mappings), then the sparse physical pages
according to the bitmap.  libkvm can now conveniently access the kvm
page table entries.

Booting my test 8GB machine, forcing it into ddb and forcing a dump
leads to a 48MB minidump.  While this is a best case, I expect minidumps
to be in the 100MB-500MB range.  Obviously, never larger than physical
memory of course.

minidumps are on by default.  It would want be necessary to turn them off
if it was necessary to debug corrupt kernel page table management as that
would mess up minidumps as well.

Both minidumps and regular dumps are supported on the same machine.
2006-04-21 04:24:50 +00:00

381 lines
9.1 KiB
C

/*-
* Copyright (c) 2002 Marcel Moolenaar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <sys/kerneldump.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/elf.h>
#include <machine/md_var.h>
CTASSERT(sizeof(struct kerneldumpheader) == 512);
int do_minidump = 1;
TUNABLE_INT("debug.minidump", &do_minidump);
SYSCTL_INT(_debug, OID_AUTO, minidump, CTLFLAG_RW, &do_minidump, 0,
"Enable mini crash dumps");
/*
* Don't touch the first SIZEOF_METADATA bytes on the dump device. This
* is to protect us from metadata and to protect metadata from us.
*/
#define SIZEOF_METADATA (64*1024)
#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
struct md_pa {
vm_paddr_t md_start;
vm_paddr_t md_size;
};
typedef int callback_t(struct md_pa *, int, void *);
static struct kerneldumpheader kdh;
static off_t dumplo, fileofs;
/* Handle buffered writes. */
static char buffer[DEV_BSIZE];
static size_t fragsz;
/* 20 phys_avail entry pairs correspond to 10 md_pa's */
static struct md_pa dump_map[10];
static void
md_pa_init(void)
{
int n, idx;
bzero(dump_map, sizeof(dump_map));
for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) {
idx = n * 2;
if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
break;
dump_map[n].md_start = dump_avail[idx];
dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx];
}
}
static struct md_pa *
md_pa_first(void)
{
return (&dump_map[0]);
}
static struct md_pa *
md_pa_next(struct md_pa *mdp)
{
mdp++;
if (mdp->md_size == 0)
mdp = NULL;
return (mdp);
}
/* XXX should be MI */
static void
mkdumpheader(struct kerneldumpheader *kdh, uint32_t archver, uint64_t dumplen,
uint32_t blksz)
{
bzero(kdh, sizeof(*kdh));
strncpy(kdh->magic, KERNELDUMPMAGIC, sizeof(kdh->magic));
strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
kdh->version = htod32(KERNELDUMPVERSION);
kdh->architectureversion = htod32(archver);
kdh->dumplength = htod64(dumplen);
kdh->dumptime = htod64(time_second);
kdh->blocksize = htod32(blksz);
strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
if (panicstr != NULL)
strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
kdh->parity = kerneldump_parity(kdh);
}
static int
buf_write(struct dumperinfo *di, char *ptr, size_t sz)
{
size_t len;
int error;
while (sz) {
len = DEV_BSIZE - fragsz;
if (len > sz)
len = sz;
bcopy(ptr, buffer + fragsz, len);
fragsz += len;
ptr += len;
sz -= len;
if (fragsz == DEV_BSIZE) {
error = di->dumper(di->priv, buffer, 0, dumplo,
DEV_BSIZE);
if (error)
return error;
dumplo += DEV_BSIZE;
fragsz = 0;
}
}
return (0);
}
static int
buf_flush(struct dumperinfo *di)
{
int error;
if (fragsz == 0)
return (0);
error = di->dumper(di->priv, buffer, 0, dumplo, DEV_BSIZE);
dumplo += DEV_BSIZE;
fragsz = 0;
return (error);
}
#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8)
static int
cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg)
{
struct dumperinfo *di = (struct dumperinfo*)arg;
vm_paddr_t a, pa;
void *va;
uint64_t pgs;
size_t counter, sz, chunk;
int i, c, error, twiddle;
error = 0; /* catch case in which chunk size is 0 */
counter = 0; /* Update twiddle every 16MB */
twiddle = 0;
va = 0;
pgs = mdp->md_size / PAGE_SIZE;
pa = mdp->md_start;
printf(" chunk %d: %ldMB (%ld pages)", seqnr, PG2MB(pgs), pgs);
while (pgs) {
chunk = pgs;
if (chunk > MAXDUMPPGS)
chunk = MAXDUMPPGS;
sz = chunk << PAGE_SHIFT;
counter += sz;
if (counter >> 24) {
printf(" %ld", PG2MB(pgs));
counter &= (1<<24) - 1;
}
for (i = 0; i < chunk; i++) {
a = pa + i * PAGE_SIZE;
va = pmap_kenter_temporary(trunc_page(a), i);
}
error = di->dumper(di->priv, va, 0, dumplo, sz);
if (error)
break;
dumplo += sz;
pgs -= chunk;
pa += sz;
/* Check for user abort. */
c = cncheckc();
if (c == 0x03)
return (ECANCELED);
if (c != -1)
printf(" (CTRL-C to abort) ");
}
printf(" ... %s\n", (error) ? "fail" : "ok");
return (error);
}
static int
cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg)
{
struct dumperinfo *di = (struct dumperinfo*)arg;
Elf_Phdr phdr;
uint64_t size;
int error;
size = mdp->md_size;
bzero(&phdr, sizeof(phdr));
phdr.p_type = PT_LOAD;
phdr.p_flags = PF_R; /* XXX */
phdr.p_offset = fileofs;
phdr.p_vaddr = mdp->md_start;
phdr.p_paddr = mdp->md_start;
phdr.p_filesz = size;
phdr.p_memsz = size;
phdr.p_align = PAGE_SIZE;
error = buf_write(di, (char*)&phdr, sizeof(phdr));
fileofs += phdr.p_filesz;
return (error);
}
static int
cb_size(struct md_pa *mdp, int seqnr, void *arg)
{
uint64_t *sz = (uint64_t*)arg;
*sz += (uint64_t)mdp->md_size;
return (0);
}
static int
foreach_chunk(callback_t cb, void *arg)
{
struct md_pa *mdp;
int error, seqnr;
seqnr = 0;
mdp = md_pa_first();
while (mdp != NULL) {
error = (*cb)(mdp, seqnr++, arg);
if (error)
return (-error);
mdp = md_pa_next(mdp);
}
return (seqnr);
}
void
dumpsys(struct dumperinfo *di)
{
Elf_Ehdr ehdr;
uint64_t dumpsize;
off_t hdrgap;
size_t hdrsz;
int error;
if (do_minidump) {
minidumpsys(di);
return;
}
bzero(&ehdr, sizeof(ehdr));
ehdr.e_ident[EI_MAG0] = ELFMAG0;
ehdr.e_ident[EI_MAG1] = ELFMAG1;
ehdr.e_ident[EI_MAG2] = ELFMAG2;
ehdr.e_ident[EI_MAG3] = ELFMAG3;
ehdr.e_ident[EI_CLASS] = ELF_CLASS;
#if BYTE_ORDER == LITTLE_ENDIAN
ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
#else
ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
#endif
ehdr.e_ident[EI_VERSION] = EV_CURRENT;
ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
ehdr.e_type = ET_CORE;
ehdr.e_machine = EM_X86_64;
ehdr.e_phoff = sizeof(ehdr);
ehdr.e_flags = 0;
ehdr.e_ehsize = sizeof(ehdr);
ehdr.e_phentsize = sizeof(Elf_Phdr);
ehdr.e_shentsize = sizeof(Elf_Shdr);
md_pa_init();
/* Calculate dump size. */
dumpsize = 0L;
ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize);
hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
fileofs = MD_ALIGN(hdrsz);
dumpsize += fileofs;
hdrgap = fileofs - DEV_ALIGN(hdrsz);
/* Determine dump offset on device. */
if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
error = ENOSPC;
goto fail;
}
dumplo = di->mediaoffset + di->mediasize - dumpsize;
dumplo -= sizeof(kdh) * 2;
mkdumpheader(&kdh, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
printf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20,
ehdr.e_phnum);
/* Dump leader */
error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
if (error)
goto fail;
dumplo += sizeof(kdh);
/* Dump ELF header */
error = buf_write(di, (char*)&ehdr, sizeof(ehdr));
if (error)
goto fail;
/* Dump program headers */
error = foreach_chunk(cb_dumphdr, di);
if (error < 0)
goto fail;
buf_flush(di);
/*
* All headers are written using blocked I/O, so we know the
* current offset is (still) block aligned. Skip the alignement
* in the file to have the segment contents aligned at page
* boundary. We cannot use MD_ALIGN on dumplo, because we don't
* care and may very well be unaligned within the dump device.
*/
dumplo += hdrgap;
/* Dump memory chunks (updates dumplo) */
error = foreach_chunk(cb_dumpdata, di);
if (error < 0)
goto fail;
/* Dump trailer */
error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
if (error)
goto fail;
/* Signal completion, signoff and exit stage left. */
di->dumper(di->priv, NULL, 0, 0, 0);
printf("\nDump complete\n");
return;
fail:
if (error < 0)
error = -error;
if (error == ECANCELED)
printf("\nDump aborted\n");
else if (error == ENOSPC)
printf("\nDump failed. Partition too small.\n");
else
printf("\n** DUMP FAILED (ERROR %d) **\n", error);
}