freebsd-skq/sys/kern/kern_dump.c
cem 98188ed5c2 Add 4Kn kernel dump support
(And 4Kn minidump support, but only for amd64.)

Make sure all I/O to the dump device is of the native sector size.  To
that end, we keep a native sector sized buffer associated with dump
devices (di->blockbuf) and use it to pad smaller objects as needed (e.g.
kerneldumpheader).

Add dump_write_pad() as a convenience API to dump smaller objects with
zero padding.  (Rather than pull in NPM leftpad, we wrote our own.)

Savecore(1) has been updated to deal with these dumps.  The format for
512-byte sector dumps should remain backwards compatible.

Minidumps for other architectures are left as an exercise for the
reader.

PR:		194279
Submitted by:	ambrisko@
Reviewed by:	cem (earlier version), rpokala
Tested by:	rpokala (4Kn/512 except 512 fulldump), cem (512 fulldump)
Relnotes:	yes
Sponsored by:	EMC / Isilon Storage Division
Differential Revision:	https://reviews.freebsd.org/D5848
2016-04-15 17:45:12 +00:00

398 lines
9.1 KiB
C

/*-
* Copyright (c) 2002 Marcel Moolenaar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_watchdog.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cons.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/kerneldump.h>
#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <machine/dump.h>
#include <machine/elf.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
CTASSERT(sizeof(struct kerneldumpheader) == 512);
/*
* Don't touch the first SIZEOF_METADATA bytes on the dump device. This
* is to protect us from metadata and to protect metadata from us.
*/
#define SIZEOF_METADATA (64*1024)
#define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE)
off_t dumplo;
/* Handle buffered writes. */
static size_t fragsz;
struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
#if !defined(__powerpc__) && !defined(__sparc__)
void
dumpsys_gen_pa_init(void)
{
int n, idx;
bzero(dump_map, sizeof(dump_map));
for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) {
idx = n * 2;
if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
break;
dump_map[n].pa_start = dump_avail[idx];
dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
}
}
#endif
struct dump_pa *
dumpsys_gen_pa_next(struct dump_pa *mdp)
{
if (mdp == NULL)
return (&dump_map[0]);
mdp++;
if (mdp->pa_size == 0)
mdp = NULL;
return (mdp);
}
void
dumpsys_gen_wbinv_all(void)
{
}
void
dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
void *va __unused)
{
}
#if !defined(__sparc__)
int
dumpsys_gen_write_aux_headers(struct dumperinfo *di)
{
return (0);
}
#endif
int
dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
{
size_t len;
int error;
while (sz) {
len = di->blocksize - fragsz;
if (len > sz)
len = sz;
memcpy((char *)di->blockbuf + fragsz, ptr, len);
fragsz += len;
ptr += len;
sz -= len;
if (fragsz == di->blocksize) {
error = dump_write(di, di->blockbuf, 0, dumplo,
di->blocksize);
if (error)
return (error);
dumplo += di->blocksize;
fragsz = 0;
}
}
return (0);
}
int
dumpsys_buf_flush(struct dumperinfo *di)
{
int error;
if (fragsz == 0)
return (0);
error = dump_write(di, di->blockbuf, 0, dumplo, di->blocksize);
dumplo += di->blocksize;
fragsz = 0;
return (error);
}
CTASSERT(PAGE_SHIFT < 20);
#define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
int
dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
{
struct dumperinfo *di = (struct dumperinfo*)arg;
vm_paddr_t pa;
void *va;
uint64_t pgs;
size_t counter, sz, chunk;
int c, error;
u_int maxdumppgs;
error = 0; /* catch case in which chunk size is 0 */
counter = 0; /* Update twiddle every 16MB */
va = NULL;
pgs = mdp->pa_size / PAGE_SIZE;
pa = mdp->pa_start;
maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
if (maxdumppgs == 0) /* seatbelt */
maxdumppgs = 1;
printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
(uintmax_t)pgs);
dumpsys_wbinv_all();
while (pgs) {
chunk = pgs;
if (chunk > maxdumppgs)
chunk = maxdumppgs;
sz = chunk << PAGE_SHIFT;
counter += sz;
if (counter >> 24) {
printf(" %ju", (uintmax_t)PG2MB(pgs));
counter &= (1 << 24) - 1;
}
dumpsys_map_chunk(pa, chunk, &va);
#ifdef SW_WATCHDOG
wdog_kern_pat(WD_LASTVAL);
#endif
error = dump_write(di, va, 0, dumplo, sz);
dumpsys_unmap_chunk(pa, chunk, va);
if (error)
break;
dumplo += sz;
pgs -= chunk;
pa += sz;
/* Check for user abort. */
c = cncheckc();
if (c == 0x03)
return (ECANCELED);
if (c != -1)
printf(" (CTRL-C to abort) ");
}
printf(" ... %s\n", (error) ? "fail" : "ok");
return (error);
}
int
dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
{
struct dump_pa *mdp;
int error, seqnr;
seqnr = 0;
mdp = dumpsys_pa_next(NULL);
while (mdp != NULL) {
error = (*cb)(mdp, seqnr++, arg);
if (error)
return (-error);
mdp = dumpsys_pa_next(mdp);
}
return (seqnr);
}
#if !defined(__sparc__)
static off_t fileofs;
static int
cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
{
struct dumperinfo *di = (struct dumperinfo*)arg;
Elf_Phdr phdr;
uint64_t size;
int error;
size = mdp->pa_size;
bzero(&phdr, sizeof(phdr));
phdr.p_type = PT_LOAD;
phdr.p_flags = PF_R; /* XXX */
phdr.p_offset = fileofs;
#ifdef __powerpc__
phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
#else
phdr.p_vaddr = mdp->pa_start;
phdr.p_paddr = mdp->pa_start;
#endif
phdr.p_filesz = size;
phdr.p_memsz = size;
phdr.p_align = PAGE_SIZE;
error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
fileofs += phdr.p_filesz;
return (error);
}
static int
cb_size(struct dump_pa *mdp, int seqnr, void *arg)
{
uint64_t *sz;
sz = (uint64_t *)arg;
*sz += (uint64_t)mdp->pa_size;
return (0);
}
int
dumpsys_generic(struct dumperinfo *di)
{
static struct kerneldumpheader kdh;
Elf_Ehdr ehdr;
uint64_t dumpsize;
off_t hdrgap;
size_t hdrsz, size;
int error;
#ifndef __powerpc__
if (do_minidump)
return (minidumpsys(di));
#endif
bzero(&ehdr, sizeof(ehdr));
ehdr.e_ident[EI_MAG0] = ELFMAG0;
ehdr.e_ident[EI_MAG1] = ELFMAG1;
ehdr.e_ident[EI_MAG2] = ELFMAG2;
ehdr.e_ident[EI_MAG3] = ELFMAG3;
ehdr.e_ident[EI_CLASS] = ELF_CLASS;
#if BYTE_ORDER == LITTLE_ENDIAN
ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
#else
ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
#endif
ehdr.e_ident[EI_VERSION] = EV_CURRENT;
ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
ehdr.e_type = ET_CORE;
ehdr.e_machine = EM_VALUE;
ehdr.e_phoff = sizeof(ehdr);
ehdr.e_flags = 0;
ehdr.e_ehsize = sizeof(ehdr);
ehdr.e_phentsize = sizeof(Elf_Phdr);
ehdr.e_shentsize = sizeof(Elf_Shdr);
dumpsys_pa_init();
/* Calculate dump size. */
dumpsize = 0L;
ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
DUMPSYS_NUM_AUX_HDRS;
hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
fileofs = MD_ALIGN(hdrsz);
dumpsize += fileofs;
hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
/* Determine dump offset on device. */
if (di->mediasize < SIZEOF_METADATA + dumpsize + di->blocksize * 2) {
error = ENOSPC;
goto fail;
}
dumplo = di->mediaoffset + di->mediasize - dumpsize;
dumplo -= di->blocksize * 2;
mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION, dumpsize,
di->blocksize);
printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
/* Dump leader */
error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size);
if (error)
goto fail;
dumplo += size;
/* Dump ELF header */
error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
if (error)
goto fail;
/* Dump program headers */
error = dumpsys_foreach_chunk(cb_dumphdr, di);
if (error < 0)
goto fail;
error = dumpsys_write_aux_headers(di);
if (error < 0)
goto fail;
dumpsys_buf_flush(di);
/*
* All headers are written using blocked I/O, so we know the
* current offset is (still) block aligned. Skip the alignement
* in the file to have the segment contents aligned at page
* boundary. We cannot use MD_ALIGN on dumplo, because we don't
* care and may very well be unaligned within the dump device.
*/
dumplo += hdrgap;
/* Dump memory chunks (updates dumplo) */
error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
if (error < 0)
goto fail;
/* Dump trailer */
error = dump_write_pad(di, &kdh, 0, dumplo, sizeof(kdh), &size);
if (error)
goto fail;
/* Signal completion, signoff and exit stage left. */
dump_write(di, NULL, 0, 0, 0);
printf("\nDump complete\n");
return (0);
fail:
if (error < 0)
error = -error;
if (error == ECANCELED)
printf("\nDump aborted\n");
else if (error == ENOSPC)
printf("\nDump failed. Partition too small.\n");
else
printf("\n** DUMP FAILED (ERROR %d) **\n", error);
return (error);
}
#endif