libkvm: Improve physical address lookup scaling.

Instead of using a hash table to convert physical page addresses to offsets
in the sparse page array, cache the number of bits set for each 4MB chunk of
physical pages.  Upon lookup, find the nearest cached population count, then
add/subtract the number of bits from that point to the page's PTE bit.
Then multiply by page size and add to the sparse page map's base offset.

This replaces O(n) worst-case lookup with O(1) (plus a small number of bits
to scan in the bitmap).  Also, for a 128GB system, a typical kernel core of
about 8GB will now only require ~4.5MB of RAM for this approach instead of
~48MB as with the hash table.

More concretely, /usr/sbin/crashinfo against the same core improves from a
max RSS of 188MB and wall time of 43.72s (33.25 user 2.94 sys) to 135MB and
9.43s (2.58 user 1.47 sys).  Running "thread apply all bt" in kgdb has a
similar RSS improvement, and wall time drops from 4.44s to 1.93s.

Reviewed by:	jhb
Sponsored by:	Backtrace I/O
This commit is contained in:
Will Andrews 2016-07-18 01:55:25 +00:00
parent 8fb15a24ce
commit ffdeef3234
8 changed files with 233 additions and 211 deletions

View File

@ -283,6 +283,8 @@ kvm_close(kvm_t *kd)
free((void *) kd->argspc);
if (kd->argv != 0)
free((void *)kd->argv);
if (kd->pt_map != NULL)
free(kd->pt_map);
free((void *)kd);
return (0);

View File

@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
struct hpt hpt;
uint64_t *page_map;
};
@ -67,7 +66,6 @@ _aarch64_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
_kvm_hpt_free(&vm->hpt);
free(vm->page_map);
free(vm);
kd->vmst = NULL;
@ -77,8 +75,7 @@ static int
_aarch64_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
uint64_t *bitmap;
off_t off;
off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@ -114,19 +111,12 @@ _aarch64_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = AARCH64_PAGE_SIZE + aarch64_round_page(vmst->hdr.msgbufsize);
bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
if (bitmap == NULL) {
_kvm_err(kd, kd->program,
"cannot allocate %d bytes for bitmap",
vmst->hdr.bitmapsize);
return (-1);
}
if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
(ssize_t)vmst->hdr.bitmapsize) {
_kvm_err(kd, kd->program,
"cannot read %d bytes for page bitmap",
vmst->hdr.bitmapsize);
free(bitmap);
/* build physical address lookup table for sparse pages */
sparse_off = off + aarch64_round_page(vmst->hdr.bitmapsize) +
aarch64_round_page(vmst->hdr.pmapsize);
if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
AARCH64_PAGE_SIZE, sizeof(uint64_t)) == -1) {
_kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += aarch64_round_page(vmst->hdr.bitmapsize);
@ -136,7 +126,6 @@ _aarch64_minidump_initvtop(kvm_t *kd)
_kvm_err(kd, kd->program,
"cannot allocate %d bytes for page_map",
vmst->hdr.pmapsize);
free(bitmap);
return (-1);
}
/* This is the end of the dump, savecore may have truncated it. */
@ -149,15 +138,9 @@ _aarch64_minidump_initvtop(kvm_t *kd)
AARCH64_PAGE_SIZE) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page_map",
vmst->hdr.pmapsize);
free(bitmap);
return (-1);
}
off += vmst->hdr.pmapsize;
/* build physical address hash table for sparse pages */
_kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
AARCH64_PAGE_SIZE, sizeof(*bitmap));
free(bitmap);
off += aarch64_round_page(vmst->hdr.pmapsize);
return (0);
}
@ -178,7 +161,7 @@ _aarch64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
a = (va - vm->hdr.dmapbase + vm->hdr.dmapphys) &
~AARCH64_PAGE_MASK;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_aarch64_minidump_vatop: "
"direct map address 0x%jx not in minidump",
@ -198,7 +181,7 @@ _aarch64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = l3 & ~AARCH64_ATTR_MASK;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_aarch64_minidump_vatop: "
"physical address 0x%jx not in minidump",

View File

@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
struct hpt hpt;
amd64_pte_t *page_map;
};
@ -66,9 +65,7 @@ _amd64_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
_kvm_hpt_free(&vm->hpt);
if (vm->page_map)
free(vm->page_map);
free(vm->page_map);
free(vm);
kd->vmst = NULL;
}
@ -77,8 +74,7 @@ static int
_amd64_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
uint64_t *bitmap;
off_t off;
off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@ -116,37 +112,28 @@ _amd64_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = AMD64_PAGE_SIZE + amd64_round_page(vmst->hdr.msgbufsize);
bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
if (bitmap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize);
return (-1);
}
if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
(ssize_t)vmst->hdr.bitmapsize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize);
free(bitmap);
sparse_off = off + amd64_round_page(vmst->hdr.bitmapsize) +
amd64_round_page(vmst->hdr.pmapsize);
if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
AMD64_PAGE_SIZE, sizeof(uint64_t)) == -1) {
_kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += amd64_round_page(vmst->hdr.bitmapsize);
vmst->page_map = _kvm_malloc(kd, vmst->hdr.pmapsize);
if (vmst->page_map == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map", vmst->hdr.pmapsize);
free(bitmap);
_kvm_err(kd, kd->program, "cannot allocate %d bytes for page_map",
vmst->hdr.pmapsize);
return (-1);
}
if (pread(kd->pmfd, vmst->page_map, vmst->hdr.pmapsize, off) !=
(ssize_t)vmst->hdr.pmapsize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page_map", vmst->hdr.pmapsize);
free(bitmap);
_kvm_err(kd, kd->program, "cannot read %d bytes for page_map",
vmst->hdr.pmapsize);
return (-1);
}
off += vmst->hdr.pmapsize;
/* build physical address hash table for sparse pages */
_kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
AMD64_PAGE_SIZE, sizeof(*bitmap));
free(bitmap);
off += amd64_round_page(vmst->hdr.pmapsize);
return (0);
}
@ -175,7 +162,7 @@ _amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = pte & AMD64_PG_FRAME;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop_v1: physical address 0x%jx not in minidump",
@ -186,7 +173,7 @@ _amd64_minidump_vatop_v1(kvm_t *kd, kvaddr_t va, off_t *pa)
return (AMD64_PAGE_SIZE - offset);
} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop_v1: direct map address 0x%jx not in minidump",
@ -235,20 +222,20 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
}
if ((pde & AMD64_PG_PS) == 0) {
a = pde & AMD64_PG_FRAME;
ofs = _kvm_hpt_find(&vm->hpt, a);
/* TODO: Just read the single PTE */
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop: pt physical address 0x%jx not in minidump",
"cannot find page table entry for %ju",
(uintmax_t)a);
goto invalid;
}
/* TODO: Just read the single PTE */
if (pread(kd->pmfd, &pt, AMD64_PAGE_SIZE, ofs) !=
AMD64_PAGE_SIZE) {
_kvm_err(kd, kd->program,
"cannot read %d bytes for page table",
AMD64_PAGE_SIZE);
return (-1);
"cannot read page table entry for %ju",
(uintmax_t)a);
goto invalid;
}
pteindex = (va >> AMD64_PAGE_SHIFT) &
(AMD64_NPTEPG - 1);
@ -263,7 +250,7 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
a = pde & AMD64_PG_PS_FRAME;
a += (va & AMD64_PDRMASK) ^ offset;
}
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop: physical address 0x%jx not in minidump",
@ -274,7 +261,7 @@ _amd64_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
return (AMD64_PAGE_SIZE - offset);
} else if (va >= vm->hdr.dmapbase && va < vm->hdr.dmapend) {
a = (va - vm->hdr.dmapbase) & ~AMD64_PAGE_MASK;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_amd64_minidump_vatop: direct map address 0x%jx not in minidump",

View File

@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
struct hpt hpt;
void *ptemap;
unsigned char ei_data;
};
@ -69,9 +68,7 @@ _arm_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
_kvm_hpt_free(&vm->hpt);
if (vm->ptemap)
free(vm->ptemap);
free(vm->ptemap);
free(vm);
kd->vmst = NULL;
}
@ -80,8 +77,7 @@ static int
_arm_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
uint32_t *bitmap;
off_t off;
off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@ -122,18 +118,11 @@ _arm_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = ARM_PAGE_SIZE + arm_round_page(vmst->hdr.msgbufsize);
bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
if (bitmap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for "
"bitmap", vmst->hdr.bitmapsize);
return (-1);
}
if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
(ssize_t)vmst->hdr.bitmapsize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap",
vmst->hdr.bitmapsize);
free(bitmap);
sparse_off = off + arm_round_page(vmst->hdr.bitmapsize) +
arm_round_page(vmst->hdr.ptesize);
if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
ARM_PAGE_SIZE, sizeof(uint32_t)) == -1) {
_kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += arm_round_page(vmst->hdr.bitmapsize);
@ -142,7 +131,6 @@ _arm_minidump_initvtop(kvm_t *kd)
if (vmst->ptemap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for "
"ptemap", vmst->hdr.ptesize);
free(bitmap);
return (-1);
}
@ -150,16 +138,9 @@ _arm_minidump_initvtop(kvm_t *kd)
(ssize_t)vmst->hdr.ptesize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for ptemap",
vmst->hdr.ptesize);
free(bitmap);
return (-1);
}
off += vmst->hdr.ptesize;
/* Build physical address hash table for sparse pages */
_kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
ARM_PAGE_SIZE, sizeof(*bitmap));
free(bitmap);
off += arm_round_page(vmst->hdr.ptesize);
return (0);
}
@ -209,7 +190,7 @@ _arm_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
a = pte & ARM_L2_S_FRAME;
}
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_arm_minidump_kvatop: "
"physical address 0x%jx not in minidump",

View File

@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
struct hpt hpt;
void *ptemap;
};
@ -66,9 +65,7 @@ _i386_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
_kvm_hpt_free(&vm->hpt);
if (vm->ptemap)
free(vm->ptemap);
free(vm->ptemap);
free(vm);
kd->vmst = NULL;
}
@ -77,8 +74,7 @@ static int
_i386_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
uint32_t *bitmap;
off_t off;
off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@ -110,15 +106,11 @@ _i386_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = I386_PAGE_SIZE + i386_round_page(vmst->hdr.msgbufsize);
bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
if (bitmap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for bitmap", vmst->hdr.bitmapsize);
return (-1);
}
if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
(ssize_t)vmst->hdr.bitmapsize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap", vmst->hdr.bitmapsize);
free(bitmap);
sparse_off = off + i386_round_page(vmst->hdr.bitmapsize) +
i386_round_page(vmst->hdr.ptesize);
if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
I386_PAGE_SIZE, sizeof(uint32_t)) == -1) {
_kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += i386_round_page(vmst->hdr.bitmapsize);
@ -126,21 +118,14 @@ _i386_minidump_initvtop(kvm_t *kd)
vmst->ptemap = _kvm_malloc(kd, vmst->hdr.ptesize);
if (vmst->ptemap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for ptemap", vmst->hdr.ptesize);
free(bitmap);
return (-1);
}
if (pread(kd->pmfd, vmst->ptemap, vmst->hdr.ptesize, off) !=
(ssize_t)vmst->hdr.ptesize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for ptemap", vmst->hdr.ptesize);
free(bitmap);
return (-1);
}
off += vmst->hdr.ptesize;
/* build physical address hash table for sparse pages */
_kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
I386_PAGE_SIZE, sizeof(*bitmap));
free(bitmap);
off += i386_round_page(vmst->hdr.ptesize);
return (0);
}
@ -171,7 +156,7 @@ _i386_minidump_vatop_pae(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = pte & I386_PG_FRAME_PAE;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_i386_minidump_vatop_pae: physical address 0x%jx not in minidump",
@ -218,7 +203,7 @@ _i386_minidump_vatop(kvm_t *kd, kvaddr_t va, off_t *pa)
goto invalid;
}
a = pte & I386_PG_FRAME;
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program,
"_i386_minidump_vatop: physical address 0x%jx not in minidump",

View File

@ -52,7 +52,6 @@ __FBSDID("$FreeBSD$");
struct vmstate {
struct minidumphdr hdr;
struct hpt hpt;
void *ptemap;
int pte_size;
};
@ -74,9 +73,7 @@ _mips_minidump_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
_kvm_hpt_free(&vm->hpt);
if (vm->ptemap)
free(vm->ptemap);
free(vm->ptemap);
free(vm);
kd->vmst = NULL;
}
@ -85,8 +82,7 @@ static int
_mips_minidump_initvtop(kvm_t *kd)
{
struct vmstate *vmst;
uint32_t *bitmap;
off_t off;
off_t off, sparse_off;
vmst = _kvm_malloc(kd, sizeof(*vmst));
if (vmst == NULL) {
@ -129,18 +125,11 @@ _mips_minidump_initvtop(kvm_t *kd)
/* Skip header and msgbuf */
off = MIPS_PAGE_SIZE + mips_round_page(vmst->hdr.msgbufsize);
bitmap = _kvm_malloc(kd, vmst->hdr.bitmapsize);
if (bitmap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for "
"bitmap", vmst->hdr.bitmapsize);
return (-1);
}
if (pread(kd->pmfd, bitmap, vmst->hdr.bitmapsize, off) !=
(ssize_t)vmst->hdr.bitmapsize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for page bitmap",
vmst->hdr.bitmapsize);
free(bitmap);
sparse_off = off + mips_round_page(vmst->hdr.bitmapsize) +
mips_round_page(vmst->hdr.ptesize);
if (_kvm_pt_init(kd, vmst->hdr.bitmapsize, off, sparse_off,
MIPS_PAGE_SIZE, sizeof(uint32_t)) == -1) {
_kvm_err(kd, kd->program, "cannot load core bitmap");
return (-1);
}
off += mips_round_page(vmst->hdr.bitmapsize);
@ -149,7 +138,6 @@ _mips_minidump_initvtop(kvm_t *kd)
if (vmst->ptemap == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %d bytes for "
"ptemap", vmst->hdr.ptesize);
free(bitmap);
return (-1);
}
@ -157,16 +145,9 @@ _mips_minidump_initvtop(kvm_t *kd)
(ssize_t)vmst->hdr.ptesize) {
_kvm_err(kd, kd->program, "cannot read %d bytes for ptemap",
vmst->hdr.ptesize);
free(bitmap);
return (-1);
}
off += vmst->hdr.ptesize;
/* Build physical address hash table for sparse pages */
_kvm_hpt_init(kd, &vmst->hpt, bitmap, vmst->hdr.bitmapsize, off,
MIPS_PAGE_SIZE, sizeof(*bitmap));
free(bitmap);
off += mips_round_page(vmst->hdr.ptesize);
return (0);
}
@ -243,7 +224,7 @@ _mips_minidump_kvatop(kvm_t *kd, kvaddr_t va, off_t *pa)
}
found:
ofs = _kvm_hpt_find(&vm->hpt, a);
ofs = _kvm_pt_find(kd, a);
if (ofs == -1) {
_kvm_err(kd, kd->program, "_mips_minidump_kvatop: physical "
"address 0x%jx not in minidump", (uintmax_t)a);

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <assert.h>
#include <fcntl.h>
#include <kvm.h>
#include <limits.h>
@ -213,73 +214,178 @@ bad:
return (-1);
}
static void
_kvm_hpt_insert(struct hpt *hpt, uint64_t pa, off_t off)
/*
* Transform v such that only bits [bit0, bitN) may be set. Generates a
* bitmask covering the number of bits, then shifts so +bit0+ is the first.
*/
static uint64_t
bitmask_range(uint64_t v, uint64_t bit0, uint64_t bitN)
{
struct hpte *hpte;
uint32_t fnv = FNV1_32_INIT;
if (bit0 == 0 && bitN == BITS_IN(v))
return (v);
fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
fnv &= (HPT_SIZE - 1);
hpte = malloc(sizeof(*hpte));
hpte->pa = pa;
hpte->off = off;
hpte->next = hpt->hpt_head[fnv];
hpt->hpt_head[fnv] = hpte;
return (v & (((1ULL << (bitN - bit0)) - 1ULL) << bit0));
}
void
_kvm_hpt_init(kvm_t *kd, struct hpt *hpt, void *base, size_t len, off_t off,
/*
* Returns the number of bits in a given byte array range starting at a
* given base, from bit0 to bitN. bit0 may be non-zero in the case of
* counting backwards from bitN.
*/
static uint64_t
popcount_bytes(uint64_t *addr, uint32_t bit0, uint32_t bitN)
{
uint32_t res = bitN - bit0;
uint64_t count = 0;
uint32_t bound;
/* Align to 64-bit boundary on the left side if needed. */
if ((bit0 % BITS_IN(*addr)) != 0) {
bound = MIN(bitN, roundup2(bit0, BITS_IN(*addr)));
count += __bitcount64(bitmask_range(*addr, bit0, bound));
res -= (bound - bit0);
addr++;
}
while (res > 0) {
bound = MIN(res, BITS_IN(*addr));
count += __bitcount64(bitmask_range(*addr, 0, bound));
res -= bound;
addr++;
}
return (count);
}
int
_kvm_pt_init(kvm_t *kd, size_t map_len, off_t map_off, off_t sparse_off,
int page_size, int word_size)
{
uint64_t bits, idx, pa;
uint64_t *base64;
uint32_t *base32;
uint64_t *addr;
uint32_t *popcount_bin;
int bin_popcounts = 0;
uint64_t pc_bins, res;
ssize_t rd;
base64 = base;
base32 = base;
for (idx = 0; idx < len / word_size; idx++) {
if (word_size == sizeof(uint64_t))
bits = _kvm64toh(kd, base64[idx]);
else
bits = _kvm32toh(kd, base32[idx]);
pa = idx * word_size * NBBY * page_size;
for (; bits != 0; bits >>= 1, pa += page_size) {
if ((bits & 1) == 0)
continue;
_kvm_hpt_insert(hpt, pa, off);
off += page_size;
/*
* Map the bitmap specified by the arguments.
*/
kd->pt_map = _kvm_malloc(kd, map_len);
if (kd->pt_map == NULL) {
_kvm_err(kd, kd->program, "cannot allocate %zu bytes for bitmap",
map_len);
return (-1);
}
rd = pread(kd->pmfd, kd->pt_map, map_len, map_off);
if (rd < 0 || rd != (ssize_t)map_len) {
_kvm_err(kd, kd->program, "cannot read %zu bytes for bitmap",
map_len);
return (-1);
}
kd->pt_map_size = map_len;
/*
* Generate a popcount cache for every POPCOUNT_BITS in the bitmap,
* so lookups only have to calculate the number of bits set between
* a cache point and their bit. This reduces lookups to O(1),
* without significantly increasing memory requirements.
*
* Round up the number of bins so that 'upper half' lookups work for
* the final bin, if needed. The first popcount is 0, since no bits
* precede bit 0, so add 1 for that also. Without this, extra work
* would be needed to handle the first PTEs in _kvm_pt_find().
*/
addr = kd->pt_map;
res = map_len;
pc_bins = 1 + (res * NBBY + POPCOUNT_BITS / 2) / POPCOUNT_BITS;
kd->pt_popcounts = calloc(pc_bins, sizeof(uint32_t));
if (kd->pt_popcounts == NULL)
return (-1);
for (popcount_bin = &kd->pt_popcounts[1]; res > 0;
addr++, res -= sizeof(*addr)) {
*popcount_bin += popcount_bytes(addr, 0,
MIN(res * NBBY, BITS_IN(*addr)));
if (++bin_popcounts == POPCOUNTS_IN(*addr)) {
popcount_bin++;
*popcount_bin = *(popcount_bin - 1);
bin_popcounts = 0;
}
}
assert(pc_bins * sizeof(*popcount_bin) ==
((uintptr_t)popcount_bin - (uintptr_t)kd->pt_popcounts));
kd->pt_sparse_off = sparse_off;
kd->pt_sparse_size = (uint64_t)*popcount_bin * PAGE_SIZE;
kd->pt_page_size = page_size;
kd->pt_word_size = word_size;
return (0);
}
/*
* Find the offset for the given physical page address; returns -1 otherwise.
*
* A page's offset is represented by the sparse page base offset plus the
* number of bits set before its bit multiplied by PAGE_SIZE. This means
* that if a page exists in the dump, it's necessary to know how many pages
* in the dump precede it. Reduce this O(n) counting to O(1) by caching the
* number of bits set at POPCOUNT_BITS intervals.
*
* Then to find the number of pages before the requested address, simply
* index into the cache and count the number of bits set between that cache
* bin and the page's bit. Halve the number of bytes that have to be
* checked by also counting down from the next higher bin if it's closer.
*/
off_t
_kvm_hpt_find(struct hpt *hpt, uint64_t pa)
_kvm_pt_find(kvm_t *kd, uint64_t pa)
{
struct hpte *hpte;
uint32_t fnv = FNV1_32_INIT;
uint64_t *bitmap = kd->pt_map;
uint64_t pte_bit_id = pa / PAGE_SIZE;
uint64_t pte_u64 = pte_bit_id / BITS_IN(*bitmap);
uint64_t popcount_id = pte_bit_id / POPCOUNT_BITS;
uint64_t pte_mask = 1ULL << (pte_bit_id % BITS_IN(*bitmap));
uint64_t bitN;
uint32_t count;
fnv = fnv_32_buf(&pa, sizeof(pa), fnv);
fnv &= (HPT_SIZE - 1);
for (hpte = hpt->hpt_head[fnv]; hpte != NULL; hpte = hpte->next) {
if (pa == hpte->pa)
return (hpte->off);
/* Check whether the page address requested is in the dump. */
if (pte_bit_id >= (kd->pt_map_size * NBBY) ||
(bitmap[pte_u64] & pte_mask) == 0)
return (-1);
/*
* Add/sub popcounts from the bitmap until the PTE's bit is reached.
* For bits that are in the upper half between the calculated
* popcount id and the next one, use the next one and subtract to
* minimize the number of popcounts required.
*/
if ((pte_bit_id % POPCOUNT_BITS) < (POPCOUNT_BITS / 2)) {
count = kd->pt_popcounts[popcount_id] + popcount_bytes(
bitmap + popcount_id * POPCOUNTS_IN(*bitmap),
0, pte_bit_id - popcount_id * POPCOUNT_BITS);
} else {
/*
* Counting in reverse is trickier, since we must avoid
* reading from bytes that are not in range, and invert.
*/
uint64_t pte_u64_bit_off = pte_u64 * BITS_IN(*bitmap);
popcount_id++;
bitN = MIN(popcount_id * POPCOUNT_BITS,
kd->pt_map_size * BITS_IN(uint8_t));
count = kd->pt_popcounts[popcount_id] - popcount_bytes(
bitmap + pte_u64,
pte_bit_id - pte_u64_bit_off, bitN - pte_u64_bit_off);
}
return (-1);
}
void
_kvm_hpt_free(struct hpt *hpt)
{
struct hpte *hpte, *next;
int i;
/*
* This can only happen if the core is truncated. Treat these
* entries as if they don't exist, since their backing doesn't.
*/
if (count >= (kd->pt_sparse_size / PAGE_SIZE))
return (-1);
for (i = 0; i < HPT_SIZE; i++) {
for (hpte = hpt->hpt_head[i]; hpte != NULL; hpte = next) {
next = hpte->next;
free(hpte);
}
}
return (kd->pt_sparse_off + (uint64_t)count * PAGE_SIZE);
}
static int

View File

@ -97,23 +97,21 @@ struct __kvm {
uintptr_t *dpcpu_off; /* base array, indexed by CPU ID */
u_int dpcpu_curcpu; /* CPU we're currently working with */
kvaddr_t dpcpu_curoff; /* dpcpu base of current CPU */
/* Page table lookup structures. */
uint64_t *pt_map;
size_t pt_map_size;
off_t pt_sparse_off;
uint64_t pt_sparse_size;
uint32_t *pt_popcounts;
unsigned int pt_page_size;
unsigned int pt_word_size;
};
/*
* Page table hash used by minidump backends to map physical addresses
* to file offsets.
*/
struct hpte {
struct hpte *next;
uint64_t pa;
off_t off;
};
#define HPT_SIZE 1024
struct hpt {
struct hpte *hpt_head[HPT_SIZE];
};
/* Page table lookup constants. */
#define POPCOUNT_BITS 1024
#define BITS_IN(v) (sizeof(v) * NBBY)
#define POPCOUNTS_IN(v) (POPCOUNT_BITS / BITS_IN(v))
/*
* Functions used internally by kvm, but across kvm modules.
@ -154,6 +152,5 @@ kvaddr_t _kvm_dpcpu_validaddr(kvm_t *, kvaddr_t);
int _kvm_probe_elf_kernel(kvm_t *, int, int);
int _kvm_is_minidump(kvm_t *);
int _kvm_read_core_phdrs(kvm_t *, size_t *, GElf_Phdr **);
void _kvm_hpt_init(kvm_t *, struct hpt *, void *, size_t, off_t, int, int);
off_t _kvm_hpt_find(struct hpt *, uint64_t);
void _kvm_hpt_free(struct hpt *);
int _kvm_pt_init(kvm_t *, size_t, off_t, off_t, int, int);
off_t _kvm_pt_find(kvm_t *, uint64_t);