From d3c34fc0f4733a783b8f4fc518b9cfe109b603b4 Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Mon, 9 Dec 2019 13:59:36 +0000 Subject: [PATCH] [PPC64] Initial libkvm minidump implementation This change adds PowerPC64 support for minidumps on libkvm. Address translation, page walk, and data retrieval were tested and seem to be working correctly. Reviewed by: jhibbits Differential Revision: https://reviews.freebsd.org/D21555 --- lib/libkvm/Makefile | 1 + lib/libkvm/kvm_minidump_powerpc64.c | 202 +++++++ lib/libkvm/kvm_minidump_powerpc64_hpt.c | 666 ++++++++++++++++++++++++ lib/libkvm/kvm_powerpc64.h | 81 +++ 4 files changed, 950 insertions(+) create mode 100644 lib/libkvm/kvm_minidump_powerpc64.c create mode 100644 lib/libkvm/kvm_minidump_powerpc64_hpt.c create mode 100644 lib/libkvm/kvm_powerpc64.h diff --git a/lib/libkvm/Makefile b/lib/libkvm/Makefile index affd02e06da1..11c095a2255d 100644 --- a/lib/libkvm/Makefile +++ b/lib/libkvm/Makefile @@ -19,6 +19,7 @@ SRCS= kvm.c kvm_cptime.c kvm_getloadavg.c \ kvm_minidump_mips.c \ kvm_powerpc.c kvm_powerpc64.c \ kvm_minidump_riscv.c \ + kvm_minidump_powerpc64.c kvm_minidump_powerpc64_hpt.c \ kvm_sparc64.c INCS= kvm.h diff --git a/lib/libkvm/kvm_minidump_powerpc64.c b/lib/libkvm/kvm_minidump_powerpc64.c new file mode 100644 index 000000000000..39c8d03fefa6 --- /dev/null +++ b/lib/libkvm/kvm_minidump_powerpc64.c @@ -0,0 +1,202 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * Copyright (c) 2019 Leandro Lupori + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From: FreeBSD: src/lib/libkvm/kvm_minidump_riscv.c + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include + +#include +#include +#include +#include + +#include "../../sys/powerpc/include/minidump.h" +#include "kvm_private.h" +#include "kvm_powerpc64.h" + + +static int +_powerpc64_minidump_probe(kvm_t *kd) +{ + return (_kvm_probe_elf_kernel(kd, ELFCLASS64, EM_PPC64) && + _kvm_is_minidump(kd)); +} + +static void +_powerpc64_minidump_freevtop(kvm_t *kd) +{ + struct vmstate *vm = kd->vmst; + + if (vm == NULL) + return; + if (PPC64_MMU_OPS(kd)) + PPC64_MMU_OP(kd, cleanup); + free(vm); + kd->vmst = NULL; +} + +static int +_powerpc64_minidump_initvtop(kvm_t *kd) +{ + struct vmstate *vmst; + struct minidumphdr *hdr; + off_t bitmap_off, pmap_off, sparse_off; + const char *mmu_name; + + /* Alloc VM */ + vmst = _kvm_malloc(kd, sizeof(*vmst)); + if (vmst == NULL) { + _kvm_err(kd, kd->program, "cannot allocate vm"); + return (-1); + } + hdr = &vmst->hdr; + kd->vmst = vmst; + PPC64_MMU_OPS(kd) = NULL; + /* Read minidump header */ + if (pread(kd->pmfd, hdr, sizeof(*hdr), 0) != sizeof(*hdr)) { + _kvm_err(kd, kd->program, "cannot read minidump header"); + goto failed; + } + /* Check magic */ + if (strncmp(MINIDUMP_MAGIC, hdr->magic, sizeof(hdr->magic)) != 0) { + _kvm_err(kd, kd->program, "not a minidump for this platform"); + goto failed; + } + /* Check version */ + hdr->version = be32toh(hdr->version); + if (hdr->version != MINIDUMP_VERSION) { + _kvm_err(kd, kd->program, "wrong minidump version. " + "Expected %d got %d", MINIDUMP_VERSION, hdr->version); + goto failed; + } + /* Convert header fields to host endian */ + hdr->msgbufsize = be32toh(hdr->msgbufsize); + hdr->bitmapsize = be32toh(hdr->bitmapsize); + hdr->pmapsize = be32toh(hdr->pmapsize); + hdr->kernbase = be64toh(hdr->kernbase); + hdr->kernend = be64toh(hdr->kernend); + hdr->dmapbase = be64toh(hdr->dmapbase); + hdr->dmapend = be64toh(hdr->dmapend); + hdr->hw_direct_map = be32toh(hdr->hw_direct_map); + hdr->startkernel = be64toh(hdr->startkernel); + hdr->endkernel = be64toh(hdr->endkernel); + + vmst->kimg_start = PPC64_KERNBASE; + vmst->kimg_end = PPC64_KERNBASE + hdr->endkernel - hdr->startkernel; + + /* dump header */ + dprintf("%s: mmu_name=%s,\n\t" + "msgbufsize=0x%jx, bitmapsize=0x%jx, pmapsize=0x%jx, " + "kernbase=0x%jx, kernend=0x%jx,\n\t" + "dmapbase=0x%jx, dmapend=0x%jx, hw_direct_map=%d, " + "startkernel=0x%jx, endkernel=0x%jx\n\t" + "kimg_start=0x%jx, kimg_end=0x%jx\n", + __func__, hdr->mmu_name, + (uintmax_t)hdr->msgbufsize, + (uintmax_t)hdr->bitmapsize, (uintmax_t)hdr->pmapsize, + (uintmax_t)hdr->kernbase, (uintmax_t)hdr->kernend, + (uintmax_t)hdr->dmapbase, (uintmax_t)hdr->dmapend, + hdr->hw_direct_map, hdr->startkernel, hdr->endkernel, + (uintmax_t)vmst->kimg_start, (uintmax_t)vmst->kimg_end); + + /* Detect and initialize MMU */ + mmu_name = hdr->mmu_name; + if (strcmp(mmu_name, PPC64_MMU_G5) == 0 || + strcmp(mmu_name, PPC64_MMU_PHYP) == 0) + PPC64_MMU_OPS(kd) = ppc64_mmu_ops_hpt; + else { + _kvm_err(kd, kd->program, "unsupported MMU: %s", mmu_name); + goto failed; + } + if (PPC64_MMU_OP(kd, init) == -1) + goto failed; + + /* Get dump parts' offsets */ + bitmap_off = PPC64_PAGE_SIZE + ppc64_round_page(hdr->msgbufsize); + pmap_off = bitmap_off + ppc64_round_page(hdr->bitmapsize); + sparse_off = pmap_off + ppc64_round_page(hdr->pmapsize); + + /* dump offsets */ + dprintf("%s: msgbuf_off=0x%jx, bitmap_off=0x%jx, pmap_off=0x%jx, " + "sparse_off=0x%jx\n", + __func__, (uintmax_t)PPC64_PAGE_SIZE, (uintmax_t)bitmap_off, + (uintmax_t)pmap_off, (uintmax_t)sparse_off); + + /* build physical address lookup table for sparse pages */ + if (_kvm_pt_init(kd, hdr->bitmapsize, bitmap_off, sparse_off, + PPC64_PAGE_SIZE, sizeof(uint64_t)) == -1) + goto failed; + + if (_kvm_pmap_init(kd, hdr->pmapsize, pmap_off) == -1) + goto failed; + return (0); + +failed: + _powerpc64_minidump_freevtop(kd); + return (-1); +} + +static int +_powerpc64_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa) +{ + if (ISALIVE(kd)) { + _kvm_err(kd, 0, "%s called in live kernel!", __func__); + return (0); + } + return (PPC64_MMU_OP(kd, kvatop, va, pa)); +} + +static int +_powerpc64_native(kvm_t *kd __unused) +{ +#ifdef __powerpc64__ + return (1); +#else + return (0); +#endif +} + +static int +_powerpc64_minidump_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg) +{ + return (PPC64_MMU_OP(kd, walk_pages, cb, arg)); +} + +static struct kvm_arch kvm_powerpc64_minidump = { + .ka_probe = _powerpc64_minidump_probe, + .ka_initvtop = _powerpc64_minidump_initvtop, + .ka_freevtop = _powerpc64_minidump_freevtop, + .ka_kvatop = _powerpc64_minidump_kvatop, + .ka_walk_pages = _powerpc64_minidump_walk_pages, + .ka_native = _powerpc64_native, +}; + +KVM_ARCH(kvm_powerpc64_minidump); diff --git a/lib/libkvm/kvm_minidump_powerpc64_hpt.c b/lib/libkvm/kvm_minidump_powerpc64_hpt.c new file mode 100644 index 000000000000..baa2fd1ebc08 --- /dev/null +++ b/lib/libkvm/kvm_minidump_powerpc64_hpt.c @@ -0,0 +1,666 @@ +/*- + * Copyright (c) 2006 Peter Wemm + * Copyright (c) 2019 Leandro Lupori + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From: FreeBSD: src/lib/libkvm/kvm_minidump_riscv.c + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include "../../sys/powerpc/include/minidump.h" +#include "kvm_private.h" +#include "kvm_powerpc64.h" + +/* + * PowerPC64 HPT machine dependent routines for kvm and minidumps. + * + * Address Translation parameters: + * + * b = 12 (SLB base page size: 4 KB) + * b = 24 (SLB base page size: 16 MB) + * p = 12 (page size: 4 KB) + * p = 24 (page size: 16 MB) + * s = 28 (segment size: 256 MB) + */ + +/* Large (huge) page params */ +#define LP_PAGE_SHIFT 24 +#define LP_PAGE_SIZE (1ULL << LP_PAGE_SHIFT) +#define LP_PAGE_MASK 0x00ffffffULL + +/* SLB */ + +#define SEGMENT_LENGTH 0x10000000ULL + +#define round_seg(x) roundup2((uint64_t)(x), SEGMENT_LENGTH) + +/* Virtual real-mode VSID in LPARs */ +#define VSID_VRMA 0x1ffffffULL + +#define SLBV_L 0x0000000000000100ULL /* Large page selector */ +#define SLBV_CLASS 0x0000000000000080ULL /* Class selector */ +#define SLBV_LP_MASK 0x0000000000000030ULL +#define SLBV_VSID_MASK 0x3ffffffffffff000ULL /* Virtual SegID mask */ +#define SLBV_VSID_SHIFT 12 + +#define SLBE_B_MASK 0x0000000006000000ULL +#define SLBE_B_256MB 0x0000000000000000ULL +#define SLBE_VALID 0x0000000008000000ULL /* SLB entry valid */ +#define SLBE_INDEX_MASK 0x0000000000000fffULL /* SLB index mask */ +#define SLBE_ESID_MASK 0xfffffffff0000000ULL /* Effective SegID mask */ +#define SLBE_ESID_SHIFT 28 + +/* PTE */ + +#define LPTEH_VSID_SHIFT 12 +#define LPTEH_AVPN_MASK 0xffffffffffffff80ULL +#define LPTEH_B_MASK 0xc000000000000000ULL +#define LPTEH_B_256MB 0x0000000000000000ULL +#define LPTEH_BIG 0x0000000000000004ULL /* 4KB/16MB page */ +#define LPTEH_HID 0x0000000000000002ULL +#define LPTEH_VALID 0x0000000000000001ULL + +#define LPTEL_RPGN 0xfffffffffffff000ULL +#define LPTEL_LP_MASK 0x00000000000ff000ULL +#define LPTEL_NOEXEC 0x0000000000000004ULL + +/* Supervisor (U: RW, S: RW) */ +#define LPTEL_BW 0x0000000000000002ULL + +/* Both Read Only (U: RO, S: RO) */ +#define LPTEL_BR 0x0000000000000003ULL + +#define LPTEL_RW LPTEL_BW +#define LPTEL_RO LPTEL_BR + +/* + * PTE AVA field manipulation macros. + * + * AVA[0:54] = PTEH[2:56] + * AVA[VSID] = AVA[0:49] = PTEH[2:51] + * AVA[PAGE] = AVA[50:54] = PTEH[52:56] + */ +#define PTEH_AVA_VSID_MASK 0x3ffffffffffff000UL +#define PTEH_AVA_VSID_SHIFT 12 +#define PTEH_AVA_VSID(p) \ + (((p) & PTEH_AVA_VSID_MASK) >> PTEH_AVA_VSID_SHIFT) + +#define PTEH_AVA_PAGE_MASK 0x0000000000000f80UL +#define PTEH_AVA_PAGE_SHIFT 7 +#define PTEH_AVA_PAGE(p) \ + (((p) & PTEH_AVA_PAGE_MASK) >> PTEH_AVA_PAGE_SHIFT) + +/* Masks to obtain the Physical Address from PTE low 64-bit word. */ +#define PTEL_PA_MASK 0x0ffffffffffff000UL +#define PTEL_LP_PA_MASK 0x0fffffffff000000UL + +#define PTE_HASH_MASK 0x0000007fffffffffUL + +/* + * Number of AVA/VA page bits to shift right, in order to leave only the + * ones that should be considered. + * + * q = MIN(54, 77-b) (PowerISA v2.07B, 5.7.7.3) + * n = q + 1 - 50 (VSID size in bits) + * s(ava) = 5 - n + * s(va) = (28 - b) - n + * + * q: bit number of lower limit of VA/AVA bits to compare + * n: number of AVA/VA page bits to compare + * s: shift amount + * 28 - b: VA page size in bits + */ +#define AVA_PAGE_SHIFT(b) (5 - (MIN(54, 77-(b)) + 1 - 50)) +#define VA_PAGE_SHIFT(b) (28 - (b) - (MIN(54, 77-(b)) + 1 - 50)) + +/* Kernel ESID -> VSID mapping */ +#define KERNEL_VSID_BIT 0x0000001000000000UL /* Bit set in all kernel VSIDs */ +#define KERNEL_VSID(esid) ((((((uint64_t)esid << 8) | ((uint64_t)esid >> 28)) \ + * 0x13bbUL) & (KERNEL_VSID_BIT - 1)) | \ + KERNEL_VSID_BIT) + +/* Types */ + +typedef uint64_t ppc64_physaddr_t; + +typedef struct { + uint64_t slbv; + uint64_t slbe; +} ppc64_slb_entry_t; + +typedef struct { + uint64_t pte_hi; + uint64_t pte_lo; +} ppc64_pt_entry_t; + +struct hpt_data { + ppc64_slb_entry_t *slbs; + uint32_t slbsize; +}; + + +static void +slb_fill(ppc64_slb_entry_t *slb, uint64_t ea, uint64_t i) +{ + uint64_t esid; + + esid = ea >> SLBE_ESID_SHIFT; + slb->slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; + slb->slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | i; +} + +static int +slb_init(kvm_t *kd) +{ + struct minidumphdr *hdr; + struct hpt_data *data; + ppc64_slb_entry_t *slb; + uint32_t slbsize; + uint64_t ea, i, maxmem; + + hdr = &kd->vmst->hdr; + data = PPC64_MMU_DATA(kd); + + /* Alloc SLBs */ + maxmem = hdr->bitmapsize * 8 * PPC64_PAGE_SIZE; + slbsize = round_seg(hdr->kernend + 1 - hdr->kernbase + maxmem) / + SEGMENT_LENGTH * sizeof(ppc64_slb_entry_t); + data->slbs = _kvm_malloc(kd, slbsize); + if (data->slbs == NULL) { + _kvm_err(kd, kd->program, "cannot allocate slbs"); + return (-1); + } + data->slbsize = slbsize; + + dprintf("%s: maxmem=0x%jx, segs=%jd, slbsize=0x%jx\n", + __func__, (uintmax_t)maxmem, + (uintmax_t)slbsize / sizeof(ppc64_slb_entry_t), (uintmax_t)slbsize); + + /* + * Generate needed SLB entries. + * + * When translating addresses from EA to VA to PA, the needed SLB + * entry could be generated on the fly, but this is not the case + * for the walk_pages method, that needs to search the SLB entry + * by VSID, in order to find out the EA from a PTE. + */ + + /* VM area */ + for (ea = hdr->kernbase, i = 0, slb = data->slbs; + ea < hdr->kernend; ea += SEGMENT_LENGTH, i++, slb++) + slb_fill(slb, ea, i); + + /* DMAP area */ + for (ea = hdr->dmapbase; + ea < MIN(hdr->dmapend, hdr->dmapbase + maxmem); + ea += SEGMENT_LENGTH, i++, slb++) { + slb_fill(slb, ea, i); + if (hdr->hw_direct_map) + slb->slbv |= SLBV_L; + } + + return (0); +} + +static void +ppc64mmu_hpt_cleanup(kvm_t *kd) +{ + struct hpt_data *data; + + if (kd->vmst == NULL) + return; + + data = PPC64_MMU_DATA(kd); + free(data->slbs); + free(data); + PPC64_MMU_DATA(kd) = NULL; +} + +static int +ppc64mmu_hpt_init(kvm_t *kd) +{ + struct hpt_data *data; + struct minidumphdr *hdr; + + hdr = &kd->vmst->hdr; + + /* Alloc MMU data */ + data = _kvm_malloc(kd, sizeof(*data)); + if (data == NULL) { + _kvm_err(kd, kd->program, "cannot allocate MMU data"); + return (-1); + } + data->slbs = NULL; + PPC64_MMU_DATA(kd) = data; + + if (slb_init(kd) == -1) + goto failed; + + return (0); + +failed: + ppc64mmu_hpt_cleanup(kd); + return (-1); +} + +static ppc64_slb_entry_t * +slb_search(kvm_t *kd, kvaddr_t ea) +{ + struct hpt_data *data; + ppc64_slb_entry_t *slb; + int i, n; + + data = PPC64_MMU_DATA(kd); + slb = data->slbs; + n = data->slbsize / sizeof(ppc64_slb_entry_t); + + /* SLB search */ + for (i = 0; i < n; i++, slb++) { + if ((slb->slbe & SLBE_VALID) == 0) + continue; + + /* Compare 36-bit ESID of EA with segment one (64-s) */ + if ((slb->slbe & SLBE_ESID_MASK) != (ea & SLBE_ESID_MASK)) + continue; + + /* Match found */ + dprintf("SEG#%02d: slbv=0x%016jx, slbe=0x%016jx\n", + i, (uintmax_t)slb->slbv, (uintmax_t)slb->slbe); + break; + } + + /* SLB not found */ + if (i == n) { + _kvm_err(kd, kd->program, "%s: segment not found for EA 0x%jx", + __func__, (uintmax_t)ea); + return (NULL); + } + return (slb); +} + +static ppc64_pt_entry_t +pte_get(kvm_t *kd, u_long ptex) +{ + ppc64_pt_entry_t pte, *p; + + p = _kvm_pmap_get(kd, ptex, sizeof(pte)); + pte.pte_hi = be64toh(p->pte_hi); + pte.pte_lo = be64toh(p->pte_lo); + return (pte); +} + +static int +pte_search(kvm_t *kd, ppc64_slb_entry_t *slb, uint64_t hid, kvaddr_t ea, + ppc64_pt_entry_t *p) +{ + uint64_t hash, hmask; + uint64_t pteg, ptex; + uint64_t va_vsid, va_page; + int b; + int ava_pg_shift, va_pg_shift; + ppc64_pt_entry_t pte; + + /* + * Get VA: + * + * va(78) = va_vsid(50) || va_page(s-b) || offset(b) + * + * va_vsid: 50-bit VSID (78-s) + * va_page: (s-b)-bit VA page + */ + b = slb->slbv & SLBV_L? LP_PAGE_SHIFT : PPC64_PAGE_SHIFT; + va_vsid = (slb->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT; + va_page = (ea & ~SLBE_ESID_MASK) >> b; + + dprintf("%s: hid=0x%jx, ea=0x%016jx, b=%d, va_vsid=0x%010jx, " + "va_page=0x%04jx\n", + __func__, (uintmax_t)hid, (uintmax_t)ea, b, + (uintmax_t)va_vsid, (uintmax_t)va_page); + + /* + * Get hash: + * + * Primary hash: va_vsid(11:49) ^ va_page(s-b) + * Secondary hash: ~primary_hash + */ + hash = (va_vsid & PTE_HASH_MASK) ^ va_page; + if (hid) + hash = ~hash & PTE_HASH_MASK; + + /* + * Get PTEG: + * + * pteg = (hash(0:38) & hmask) << 3 + * + * hmask (hash mask): mask generated from HTABSIZE || 11*0b1 + * hmask = number_of_ptegs - 1 + */ + hmask = kd->vmst->hdr.pmapsize / (8 * sizeof(ppc64_pt_entry_t)) - 1; + pteg = (hash & hmask) << 3; + + ava_pg_shift = AVA_PAGE_SHIFT(b); + va_pg_shift = VA_PAGE_SHIFT(b); + + dprintf("%s: hash=0x%010jx, hmask=0x%010jx, (hash & hmask)=0x%010jx, " + "pteg=0x%011jx, ava_pg_shift=%d, va_pg_shift=%d\n", + __func__, (uintmax_t)hash, (uintmax_t)hmask, + (uintmax_t)(hash & hmask), (uintmax_t)pteg, + ava_pg_shift, va_pg_shift); + + /* Search PTEG */ + for (ptex = pteg; ptex < pteg + 8; ptex++) { + pte = pte_get(kd, ptex); + + /* Check H, V and B */ + if ((pte.pte_hi & LPTEH_HID) != hid || + (pte.pte_hi & LPTEH_VALID) == 0 || + (pte.pte_hi & LPTEH_B_MASK) != LPTEH_B_256MB) + continue; + + /* Compare AVA with VA */ + if (PTEH_AVA_VSID(pte.pte_hi) != va_vsid || + (PTEH_AVA_PAGE(pte.pte_hi) >> ava_pg_shift) != + (va_page >> va_pg_shift)) + continue; + + /* + * Check if PTE[L] matches SLBV[L]. + * + * Note: this check ignores PTE[LP], as does the kernel. + */ + if (b == PPC64_PAGE_SHIFT) { + if (pte.pte_hi & LPTEH_BIG) + continue; + } else if ((pte.pte_hi & LPTEH_BIG) == 0) + continue; + + /* Match found */ + dprintf("%s: PTE found: ptex=0x%jx, pteh=0x%016jx, " + "ptel=0x%016jx\n", + __func__, (uintmax_t)ptex, (uintmax_t)pte.pte_hi, + (uintmax_t)pte.pte_lo); + break; + } + + /* Not found? */ + if (ptex == pteg + 8) { + /* Try secondary hash */ + if (hid == 0) + return (pte_search(kd, slb, LPTEH_HID, ea, p)); + else { + _kvm_err(kd, kd->program, + "%s: pte not found", __func__); + return (-1); + } + } + + /* PTE found */ + *p = pte; + return (0); +} + +static int +pte_lookup(kvm_t *kd, kvaddr_t ea, ppc64_pt_entry_t *pte) +{ + ppc64_slb_entry_t *slb; + + /* First, find SLB */ + if ((slb = slb_search(kd, ea)) == NULL) + return (-1); + + /* Next, find PTE */ + return (pte_search(kd, slb, 0, ea, pte)); +} + +static int +ppc64mmu_hpt_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa) +{ + struct minidumphdr *hdr; + struct vmstate *vm; + ppc64_pt_entry_t pte; + ppc64_physaddr_t pgoff, pgpa; + off_t ptoff; + int err; + + vm = kd->vmst; + hdr = &vm->hdr; + pgoff = va & PPC64_PAGE_MASK; + + dprintf("%s: va=0x%016jx\n", __func__, (uintmax_t)va); + + /* + * A common use case of libkvm is to first find a symbol address + * from the kernel image and then use kvatop to translate it and + * to be able to fetch its corresponding data. + * + * The problem is that, in PowerPC64 case, the addresses of relocated + * data won't match those in the kernel image. This is handled here by + * adding the relocation offset to those addresses. + */ + if (va < hdr->dmapbase) + va += hdr->startkernel - PPC64_KERNBASE; + + /* Handle DMAP */ + if (va >= hdr->dmapbase && va <= hdr->dmapend) { + pgpa = (va & ~hdr->dmapbase) & ~PPC64_PAGE_MASK; + ptoff = _kvm_pt_find(kd, pgpa, PPC64_PAGE_SIZE); + if (ptoff == -1) { + _kvm_err(kd, kd->program, "%s: " + "direct map address 0x%jx not in minidump", + __func__, (uintmax_t)va); + goto invalid; + } + *pa = ptoff + pgoff; + return (PPC64_PAGE_SIZE - pgoff); + /* Translate VA to PA */ + } else if (va >= hdr->kernbase) { + if ((err = pte_lookup(kd, va, &pte)) == -1) { + _kvm_err(kd, kd->program, + "%s: pte not valid", __func__); + goto invalid; + } + + if (pte.pte_hi & LPTEH_BIG) + pgpa = (pte.pte_lo & PTEL_LP_PA_MASK) | + (va & ~PPC64_PAGE_MASK & LP_PAGE_MASK); + else + pgpa = pte.pte_lo & PTEL_PA_MASK; + dprintf("%s: pgpa=0x%016jx\n", __func__, (uintmax_t)pgpa); + + ptoff = _kvm_pt_find(kd, pgpa, PPC64_PAGE_SIZE); + if (ptoff == -1) { + _kvm_err(kd, kd->program, "%s: " + "physical address 0x%jx not in minidump", + __func__, (uintmax_t)pgpa); + goto invalid; + } + *pa = ptoff + pgoff; + return (PPC64_PAGE_SIZE - pgoff); + } else { + _kvm_err(kd, kd->program, + "%s: virtual address 0x%jx not minidumped", + __func__, (uintmax_t)va); + goto invalid; + } + +invalid: + _kvm_err(kd, 0, "invalid address (0x%jx)", (uintmax_t)va); + return (0); +} + +static vm_prot_t +entry_to_prot(ppc64_pt_entry_t *pte) +{ + vm_prot_t prot = VM_PROT_READ; + + if (pte->pte_lo & LPTEL_RW) + prot |= VM_PROT_WRITE; + if ((pte->pte_lo & LPTEL_NOEXEC) != 0) + prot |= VM_PROT_EXECUTE; + return (prot); +} + +static ppc64_slb_entry_t * +slb_vsid_search(kvm_t *kd, uint64_t vsid) +{ + struct hpt_data *data; + ppc64_slb_entry_t *slb; + int i, n; + + data = PPC64_MMU_DATA(kd); + slb = data->slbs; + n = data->slbsize / sizeof(ppc64_slb_entry_t); + vsid <<= SLBV_VSID_SHIFT; + + /* SLB search */ + for (i = 0; i < n; i++, slb++) { + /* Check if valid and compare VSID */ + if ((slb->slbe & SLBE_VALID) && + (slb->slbv & SLBV_VSID_MASK) == vsid) + break; + } + + /* SLB not found */ + if (i == n) { + _kvm_err(kd, kd->program, + "%s: segment not found for VSID 0x%jx", + __func__, (uintmax_t)vsid >> SLBV_VSID_SHIFT); + return (NULL); + } + return (slb); +} + +static u_long +get_ea(kvm_t *kd, ppc64_pt_entry_t *pte, u_long ptex) +{ + ppc64_slb_entry_t *slb; + uint64_t ea, hash, vsid; + int b, shift; + + /* Find SLB */ + vsid = PTEH_AVA_VSID(pte->pte_hi); + if ((slb = slb_vsid_search(kd, vsid)) == NULL) + return (~0UL); + + /* Get ESID part of EA */ + ea = slb->slbe & SLBE_ESID_MASK; + + b = slb->slbv & SLBV_L? LP_PAGE_SHIFT : PPC64_PAGE_SHIFT; + + /* + * If there are less than 64K PTEGs (16-bit), the upper bits of + * EA page must be obtained from PTEH's AVA. + */ + if (kd->vmst->hdr.pmapsize / (8 * sizeof(ppc64_pt_entry_t)) < + 0x10000U) { + /* + * Add 0 to 5 EA bits, right after VSID. + * b == 12: 5 bits + * b == 24: 4 bits + */ + shift = AVA_PAGE_SHIFT(b); + ea |= (PTEH_AVA_PAGE(pte->pte_hi) >> shift) << + (SLBE_ESID_SHIFT - 5 + shift); + } + + /* Get VA page from hash and add to EA. */ + hash = (ptex & ~7) >> 3; + if (pte->pte_hi & LPTEH_HID) + hash = ~hash & PTE_HASH_MASK; + ea |= ((hash ^ (vsid & PTE_HASH_MASK)) << b) & ~SLBE_ESID_MASK; + return (ea); +} + +static int +ppc64mmu_hpt_walk_pages(kvm_t *kd, kvm_walk_pages_cb_t *cb, void *arg) +{ + struct vmstate *vm; + int ret; + unsigned int pagesz; + u_long dva, pa, va; + u_long ptex, nptes; + uint64_t vsid; + + ret = 0; + vm = kd->vmst; + nptes = vm->hdr.pmapsize / sizeof(ppc64_pt_entry_t); + + /* Walk through PTEs */ + for (ptex = 0; ptex < nptes; ptex++) { + ppc64_pt_entry_t pte = pte_get(kd, ptex); + if ((pte.pte_hi & LPTEH_VALID) == 0) + continue; + + /* Skip non-kernel related pages, as well as VRMA ones */ + vsid = PTEH_AVA_VSID(pte.pte_hi); + if ((vsid & KERNEL_VSID_BIT) == 0 || + (vsid >> PPC64_PAGE_SHIFT) == VSID_VRMA) + continue; + + /* Retrieve page's VA (EA on PPC64 terminology) */ + if ((va = get_ea(kd, &pte, ptex)) == ~0UL) + goto out; + + /* Get PA and page size */ + if (pte.pte_hi & LPTEH_BIG) { + pa = pte.pte_lo & PTEL_LP_PA_MASK; + pagesz = LP_PAGE_SIZE; + } else { + pa = pte.pte_lo & PTEL_PA_MASK; + pagesz = PPC64_PAGE_SIZE; + } + + /* Get DMAP address */ + dva = vm->hdr.dmapbase + pa; + + if (!_kvm_visit_cb(kd, cb, arg, pa, va, dva, + entry_to_prot(&pte), pagesz, 0)) + goto out; + } + ret = 1; + +out: + return (ret); +} + + +static struct ppc64_mmu_ops ops = { + .init = ppc64mmu_hpt_init, + .cleanup = ppc64mmu_hpt_cleanup, + .kvatop = ppc64mmu_hpt_kvatop, + .walk_pages = ppc64mmu_hpt_walk_pages, +}; +struct ppc64_mmu_ops *ppc64_mmu_ops_hpt = &ops; diff --git a/lib/libkvm/kvm_powerpc64.h b/lib/libkvm/kvm_powerpc64.h new file mode 100644 index 000000000000..21734152bdd3 --- /dev/null +++ b/lib/libkvm/kvm_powerpc64.h @@ -0,0 +1,81 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 Leandro Lupori + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __KVM_POWERPC64_H__ +#define __KVM_POWERPC64_H__ + +/* Debug stuff */ +#define KVM_PPC64_DBG 0 +#if KVM_PPC64_DBG +#include +#define dprintf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + + +#define PPC64_KERNBASE 0x100100ULL + +/* Page params */ +#define PPC64_PAGE_SHIFT 12 +#define PPC64_PAGE_SIZE (1ULL << PPC64_PAGE_SHIFT) +#define PPC64_PAGE_MASK (PPC64_PAGE_SIZE - 1) + +#define ppc64_round_page(x) roundup2((kvaddr_t)(x), PPC64_PAGE_SIZE) + +#define PPC64_MMU_G5 "mmu_g5" +#define PPC64_MMU_PHYP "mmu_phyp" + +/* MMU interface */ +#define PPC64_MMU_OPS(kd) (kd)->vmst->mmu.ops +#define PPC64_MMU_OP(kd, op, ...) PPC64_MMU_OPS(kd)->op((kd), ## __VA_ARGS__) +#define PPC64_MMU_DATA(kd) (kd)->vmst->mmu.data + +struct ppc64_mmu_ops { + int (*init)(kvm_t *); + void (*cleanup)(kvm_t *); + int (*kvatop)(kvm_t *, kvaddr_t, off_t *); + int (*walk_pages)(kvm_t *, kvm_walk_pages_cb_t *, void *); +}; + +struct ppc64_mmu { + struct ppc64_mmu_ops *ops; + void *data; +}; + +struct vmstate { + struct minidumphdr hdr; + uint64_t kimg_start; + uint64_t kimg_end; + struct ppc64_mmu mmu; +}; + +extern struct ppc64_mmu_ops *ppc64_mmu_ops_hpt; + +#endif /* !__KVM_POWERPC64_H__ */