Add a sysctl to dump kernel mappings and their properties on amd64.
The sysctl is called vm.pmap.kernel_maps. It dumps address ranges and their corresponding protection and mapping mode, as well as counts of 2MB and 1GB pages in the range. Reviewed by: kib MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D21380
This commit is contained in:
parent
628e9ea4a8
commit
116c38c27d
@ -124,6 +124,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/proc.h>
|
||||
#include <sys/rangeset.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/sbuf.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/turnstile.h>
|
||||
#include <sys/vmem.h>
|
||||
@ -2112,6 +2113,41 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
|
||||
return (mask);
|
||||
}
|
||||
|
||||
static int
|
||||
pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde)
|
||||
{
|
||||
int pat_flag, pat_idx;
|
||||
|
||||
pat_idx = 0;
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
case PT_RVI:
|
||||
/* The PAT bit is different for PTE's and PDE's. */
|
||||
pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT;
|
||||
|
||||
if ((pte & pat_flag) != 0)
|
||||
pat_idx |= 0x4;
|
||||
if ((pte & PG_NC_PCD) != 0)
|
||||
pat_idx |= 0x2;
|
||||
if ((pte & PG_NC_PWT) != 0)
|
||||
pat_idx |= 0x1;
|
||||
break;
|
||||
case PT_EPT:
|
||||
if ((pte & EPT_PG_IGNORE_PAT) != 0)
|
||||
panic("EPT PTE %#lx has no PAT memory type", pte);
|
||||
pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3;
|
||||
break;
|
||||
}
|
||||
|
||||
/* See pmap_init_pat(). */
|
||||
if (pat_idx == 4)
|
||||
pat_idx = 0;
|
||||
if (pat_idx == 7)
|
||||
pat_idx = 3;
|
||||
|
||||
return (pat_idx);
|
||||
}
|
||||
|
||||
bool
|
||||
pmap_ps_enabled(pmap_t pmap)
|
||||
{
|
||||
@ -9981,6 +10017,268 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Track a range of the kernel's virtual address space that is contiguous
|
||||
* in various mapping attributes.
|
||||
*/
|
||||
struct pmap_kernel_map_range {
|
||||
vm_offset_t sva;
|
||||
pt_entry_t attrs;
|
||||
int ptes;
|
||||
int pdes;
|
||||
int pdpes;
|
||||
};
|
||||
|
||||
static void
|
||||
sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
|
||||
vm_offset_t eva)
|
||||
{
|
||||
const char *mode;
|
||||
int i, pat_idx;
|
||||
|
||||
if (eva <= range->sva)
|
||||
return;
|
||||
|
||||
pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true);
|
||||
for (i = 0; i < PAT_INDEX_SIZE; i++)
|
||||
if (pat_index[i] == pat_idx)
|
||||
break;
|
||||
|
||||
switch (i) {
|
||||
case PAT_WRITE_BACK:
|
||||
mode = "WB";
|
||||
break;
|
||||
case PAT_WRITE_THROUGH:
|
||||
mode = "WT";
|
||||
break;
|
||||
case PAT_UNCACHEABLE:
|
||||
mode = "UC";
|
||||
break;
|
||||
case PAT_WRITE_PROTECTED:
|
||||
mode = "WP";
|
||||
break;
|
||||
case PAT_WRITE_COMBINING:
|
||||
mode = "WC";
|
||||
break;
|
||||
default:
|
||||
printf("%s: unknown PAT mode %#x for range %#016lx-%#016lx\n",
|
||||
__func__, i, range->sva, eva);
|
||||
mode = "??";
|
||||
break;
|
||||
}
|
||||
|
||||
sbuf_printf(sb, "%#016lx-%#016lx r%c%c%c%c %s %d %d %d\n",
|
||||
range->sva, eva,
|
||||
(range->attrs & X86_PG_RW) != 0 ? 'w' : '-',
|
||||
(range->attrs & pg_nx) != 0 ? '-' : 'x',
|
||||
(range->attrs & X86_PG_U) != 0 ? 'u' : 's',
|
||||
(range->attrs & X86_PG_G) != 0 ? 'g' : '-',
|
||||
mode, range->pdpes, range->pdes, range->ptes);
|
||||
|
||||
/* Reset to sentinel value. */
|
||||
range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine whether the attributes specified by a page table entry match those
|
||||
* being tracked by the current range. This is not quite as simple as a direct
|
||||
* flag comparison since some PAT modes have multiple representations.
|
||||
*/
|
||||
static bool
|
||||
sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs)
|
||||
{
|
||||
pt_entry_t diff, mask;
|
||||
|
||||
mask = X86_PG_G | X86_PG_RW | X86_PG_U | X86_PG_PDE_CACHE | pg_nx;
|
||||
diff = (range->attrs ^ attrs) & mask;
|
||||
if (diff == 0)
|
||||
return (true);
|
||||
if ((diff & ~X86_PG_PDE_PAT) == 0 &&
|
||||
pmap_pat_index(kernel_pmap, range->attrs, true) ==
|
||||
pmap_pat_index(kernel_pmap, attrs, true))
|
||||
return (true);
|
||||
return (false);
|
||||
}
|
||||
|
||||
static void
|
||||
sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
|
||||
pt_entry_t attrs)
|
||||
{
|
||||
|
||||
memset(range, 0, sizeof(*range));
|
||||
range->sva = va;
|
||||
range->attrs = attrs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a leaf PTE, derive the mapping's attributes. If they do not match
|
||||
* those of the current run, dump the address range and its attributes, and
|
||||
* begin a new run.
|
||||
*/
|
||||
static void
|
||||
sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
|
||||
vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde,
|
||||
pt_entry_t pte)
|
||||
{
|
||||
pt_entry_t attrs;
|
||||
|
||||
attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx);
|
||||
|
||||
attrs |= pdpe & pg_nx;
|
||||
attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U));
|
||||
if ((pdpe & PG_PS) != 0) {
|
||||
attrs |= pdpe & (X86_PG_G | X86_PG_PDE_CACHE);
|
||||
} else if (pde != 0) {
|
||||
attrs |= pde & pg_nx;
|
||||
attrs &= pg_nx | (pde & (X86_PG_RW | X86_PG_U));
|
||||
}
|
||||
if ((pde & PG_PS) != 0) {
|
||||
attrs |= pde & (X86_PG_G | X86_PG_PDE_CACHE);
|
||||
} else if (pte != 0) {
|
||||
attrs |= pte & pg_nx;
|
||||
attrs &= pg_nx | (pte & (X86_PG_RW | X86_PG_U));
|
||||
attrs |= pte & (X86_PG_G | X86_PG_PTE_CACHE);
|
||||
|
||||
/* Canonicalize by always using the PDE PAT bit. */
|
||||
if ((attrs & X86_PG_PTE_PAT) != 0)
|
||||
attrs ^= X86_PG_PDE_PAT | X86_PG_PTE_PAT;
|
||||
}
|
||||
|
||||
if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
|
||||
sysctl_kmaps_dump(sb, range, va);
|
||||
sysctl_kmaps_reinit(range, va, attrs);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
sysctl_kmaps(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
struct pmap_kernel_map_range range;
|
||||
struct sbuf sbuf, *sb;
|
||||
pml4_entry_t pml4e;
|
||||
pdp_entry_t *pdp, pdpe;
|
||||
pd_entry_t *pd, pde;
|
||||
pt_entry_t *pt, pte;
|
||||
vm_offset_t sva;
|
||||
vm_paddr_t pa;
|
||||
int error, i, j, k, l;
|
||||
|
||||
error = sysctl_wire_old_buffer(req, 0);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
sb = &sbuf;
|
||||
sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
|
||||
|
||||
/* Sentinel value. */
|
||||
range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1);
|
||||
|
||||
/*
|
||||
* Iterate over the kernel page tables without holding the kernel pmap
|
||||
* lock. Outside of the large map, kernel page table pages are never
|
||||
* freed, so at worst we will observe inconsistencies in the output.
|
||||
* Within the large map, ensure that PDP and PD page addresses are
|
||||
* valid before descending.
|
||||
*/
|
||||
for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) {
|
||||
switch (i) {
|
||||
case PML4PML4I:
|
||||
sbuf_printf(sb, "\nRecursive map:\n");
|
||||
break;
|
||||
case DMPML4I:
|
||||
sbuf_printf(sb, "\nDirect map:\n");
|
||||
break;
|
||||
case KPML4BASE:
|
||||
sbuf_printf(sb, "\nKernel map:\n");
|
||||
break;
|
||||
case LMSPML4I:
|
||||
sbuf_printf(sb, "\nLarge map:\n");
|
||||
break;
|
||||
}
|
||||
|
||||
/* Convert to canonical form. */
|
||||
if (sva == 1ul << 47)
|
||||
sva |= -1ul << 48;
|
||||
|
||||
restart:
|
||||
pml4e = kernel_pmap->pm_pml4[i];
|
||||
if ((pml4e & X86_PG_V) == 0) {
|
||||
sva = rounddown2(sva, NBPML4);
|
||||
sysctl_kmaps_dump(sb, &range, sva);
|
||||
sva += NBPML4;
|
||||
continue;
|
||||
}
|
||||
pa = pml4e & PG_FRAME;
|
||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pa);
|
||||
|
||||
for (j = pmap_pdpe_index(sva); j < NPDPEPG; j++) {
|
||||
pdpe = pdp[j];
|
||||
if ((pdpe & X86_PG_V) == 0) {
|
||||
sva = rounddown2(sva, NBPDP);
|
||||
sysctl_kmaps_dump(sb, &range, sva);
|
||||
sva += NBPDP;
|
||||
continue;
|
||||
}
|
||||
pa = pdpe & PG_FRAME;
|
||||
if (PMAP_ADDRESS_IN_LARGEMAP(sva) &&
|
||||
vm_phys_paddr_to_vm_page(pa) == NULL)
|
||||
goto restart;
|
||||
if ((pdpe & PG_PS) != 0) {
|
||||
sva = rounddown2(sva, NBPDP);
|
||||
sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe,
|
||||
0, 0);
|
||||
range.pdpes++;
|
||||
sva += NBPDP;
|
||||
continue;
|
||||
}
|
||||
pd = (pd_entry_t *)PHYS_TO_DMAP(pa);
|
||||
|
||||
for (k = pmap_pde_index(sva); k < NPDEPG; k++) {
|
||||
pde = pd[k];
|
||||
if ((pde & X86_PG_V) == 0) {
|
||||
sva = rounddown2(sva, NBPDR);
|
||||
sysctl_kmaps_dump(sb, &range, sva);
|
||||
sva += NBPDR;
|
||||
continue;
|
||||
}
|
||||
pa = pde & PG_FRAME;
|
||||
if (PMAP_ADDRESS_IN_LARGEMAP(sva) &&
|
||||
vm_phys_paddr_to_vm_page(pa) == NULL)
|
||||
goto restart;
|
||||
if ((pde & PG_PS) != 0) {
|
||||
sva = rounddown2(sva, NBPDR);
|
||||
sysctl_kmaps_check(sb, &range, sva,
|
||||
pml4e, pdpe, pde, 0);
|
||||
range.pdes++;
|
||||
sva += NBPDR;
|
||||
continue;
|
||||
}
|
||||
pt = (pt_entry_t *)PHYS_TO_DMAP(pa);
|
||||
|
||||
for (l = pmap_pte_index(sva); l < NPTEPG; l++,
|
||||
sva += PAGE_SIZE) {
|
||||
pte = pt[l];
|
||||
if ((pte & X86_PG_V) == 0) {
|
||||
sysctl_kmaps_dump(sb, &range,
|
||||
sva);
|
||||
continue;
|
||||
}
|
||||
sysctl_kmaps_check(sb, &range, sva,
|
||||
pml4e, pdpe, pde, pte);
|
||||
range.ptes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = sbuf_finish(sb);
|
||||
sbuf_delete(sb);
|
||||
return (error);
|
||||
}
|
||||
SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps,
|
||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
|
||||
NULL, 0, sysctl_kmaps, "A",
|
||||
"Dump kernel address layout");
|
||||
|
||||
#ifdef DDB
|
||||
DB_SHOW_COMMAND(pte, pmap_print_pte)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user