Add experimental 16k page support on arm64

Add initial 16k page support on arm64. It is considered experimental,
with no guarantee of compatibility with a userspace or kernel modules
built with the current a 4k page size as code will likely try to pass
in a too small size when working with APIs that take a multiple of a
page, e.g. mmap.

As this is experimental, and because userspace and the kernel need to
have the PAGE_SIZE macro kept in sync there is no kernel option to
enable this. To test a new image should be built with the
PAGE_{SIZE,SHIFT,MASK} macros changed to the 16k versions.

There are currently known issues with loading modules from an old
loader as it can misalign them to load on a non-16k boundary.

Testing has shown good results in kernel workloads that allocate and
free large amounts of memory as only a quarter of the number of calls
into the VM subsystem are needed in the best case.

Reviewed by:	markj
Tested by:	gallatin
Sponsored by:	The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D34793
This commit is contained in:
Andrew Turner 2022-03-23 17:39:58 +00:00
parent 54291f7d65
commit 36f1526a59
5 changed files with 277 additions and 64 deletions

View File

@ -39,6 +39,14 @@
#define VIRT_BITS 48
#if PAGE_SIZE == PAGE_SIZE_16K
/*
* The number of level 3 tables to create. 32 will allow for 1G of address
* space, the same as a single level 2 page with 4k pages.
*/
#define L3_PAGE_COUNT 32
#endif
.globl kernbase
.set kernbase, KERNBASE
@ -431,8 +439,13 @@ LENTRY(create_pagetables)
/* Booted with modules pointer */
/* Find modulep - begin */
sub x8, x0, x6
/* Add two 2MiB pages for the module data and round up */
ldr x7, =(3 * L2_SIZE - 1)
/*
* Add space for the module data. When PAGE_SIZE is 4k this will
* add at least 2 level 2 blocks (2 * 2MiB). When PAGE_SIZE is
* larger it will be at least as large as we use smaller level 3
* pages.
*/
ldr x7, =((6 * 1024 * 1024) - 1)
add x8, x8, x7
b common
@ -457,6 +470,34 @@ booti_no_fdt:
#endif
common:
#if PAGE_SIZE != PAGE_SIZE_4K
/*
* Create L3 pages. The kernel will be loaded at a 2M aligned
* address, however L2 blocks are too large when the page size is
* not 4k to map the kernel with such an aligned address. However,
* when the page size is larger than 4k, L2 blocks are too large to
* map the kernel with such an alignment.
*/
/* Get the number of l3 pages to allocate, rounded down */
lsr x10, x8, #(L3_SHIFT)
/* Create the kernel space L2 table */
mov x6, x26
mov x7, #(ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
mov x8, #(KERNBASE)
mov x9, x28
bl build_l3_page_pagetable
/* Move to the l2 table */
ldr x9, =(PAGE_SIZE * L3_PAGE_COUNT)
add x26, x26, x9
/* Link the l2 -> l3 table */
mov x9, x6
mov x6, x26
bl link_l2_pagetable
#else
/* Get the number of l2 pages to allocate, rounded down */
lsr x10, x8, #(L2_SHIFT)
@ -466,6 +507,7 @@ common:
mov x8, #(KERNBASE)
mov x9, x28
bl build_l2_block_pagetable
#endif
/* Move to the l1 table */
add x26, x26, #PAGE_SIZE
@ -504,7 +546,8 @@ common:
#if defined(SOCDEV_PA)
/* Create a table for the UART */
mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_DEVICE))
add x16, x16, #(L2_SIZE) /* VA start */
ldr x9, =(L2_SIZE)
add x16, x16, x9 /* VA start */
mov x8, x16
/* Store the socdev virtual address */
@ -523,7 +566,8 @@ common:
/* Create the mapping for FDT data (2 MiB max) */
mov x7, #(ATTR_S1_nG | ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK))
add x16, x16, #(L2_SIZE) /* VA start */
ldr x9, =(L2_SIZE)
add x16, x16, x9 /* VA start */
mov x8, x16
mov x9, x0 /* PA start */
/* Update the module pointer to point at the allocated memory */
@ -662,6 +706,76 @@ LENTRY(build_l2_block_pagetable)
ret
LEND(build_l2_block_pagetable)
#if PAGE_SIZE != PAGE_SIZE_4K
/*
* Builds an L2 -> L3 table descriptor
*
* x6 = L2 table
* x8 = Virtual Address
* x9 = L3 PA (trashed)
* x11, x12 and x13 are trashed
*/
LENTRY(link_l2_pagetable)
/*
* Link an L2 -> L3 table entry.
*/
/* Find the table index */
lsr x11, x8, #L2_SHIFT
and x11, x11, #Ln_ADDR_MASK
/* Build the L1 block entry */
mov x12, #L2_TABLE
/* Only use the output address bits */
lsr x9, x9, #PAGE_SHIFT
orr x13, x12, x9, lsl #PAGE_SHIFT
/* Store the entry */
str x13, [x6, x11, lsl #3]
ret
LEND(link_l2_pagetable)
/*
* Builds count level 3 page table entries
* x6 = L3 table
* x7 = Block attributes
* x8 = VA start
* x9 = PA start (trashed)
* x10 = Entry count (trashed)
* x11, x12 and x13 are trashed
*/
LENTRY(build_l3_page_pagetable)
/*
* Build the L3 table entry.
*/
/* Find the table index */
lsr x11, x8, #L3_SHIFT
and x11, x11, #Ln_ADDR_MASK
/* Build the L3 page entry */
orr x12, x7, #L3_PAGE
orr x12, x12, #(ATTR_DEFAULT)
orr x12, x12, #(ATTR_S1_UXN)
/* Only use the output address bits */
lsr x9, x9, #L3_SHIFT
/* Set the physical address for this virtual address */
1: orr x13, x12, x9, lsl #L3_SHIFT
/* Store the entry */
str x13, [x6, x11, lsl #3]
sub x10, x10, #1
add x11, x11, #1
add x9, x9, #1
cbnz x10, 1b
ret
LEND(build_l3_page_pagetable)
#endif
LENTRY(start_mmu)
dsb sy
@ -743,7 +857,15 @@ mair:
MAIR_ATTR(MAIR_NORMAL_WT, VM_MEMATTR_WRITE_THROUGH) | \
MAIR_ATTR(MAIR_DEVICE_nGnRE, VM_MEMATTR_DEVICE_nGnRE)
tcr:
.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG1_4K | TCR_TG0_4K | \
#if PAGE_SIZE == PAGE_SIZE_4K
#define TCR_TG (TCR_TG1_4K | TCR_TG0_4K)
#elif PAGE_SIZE == PAGE_SIZE_16K
#define TCR_TG (TCR_TG1_16K | TCR_TG0_16K)
#else
#error Unsupported page size
#endif
.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_TG | \
TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
sctlr_set:
/* Bits to set */
@ -774,6 +896,10 @@ END(abort)
*/
.globl pagetable_l0_ttbr1
pagetable:
#if PAGE_SIZE != PAGE_SIZE_4K
.space (PAGE_SIZE * L3_PAGE_COUNT)
pagetable_l2_ttbr1:
#endif
.space PAGE_SIZE
pagetable_l1_ttbr1:
.space PAGE_SIZE

View File

@ -239,7 +239,13 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
mdhdr.dmapbase = DMAP_MIN_ADDRESS;
mdhdr.dmapend = DMAP_MAX_ADDRESS;
mdhdr.dumpavailsize = round_page(sizeof(dump_avail));
#if PAGE_SIZE == PAGE_SIZE_4K
mdhdr.flags = MINIDUMP_FLAG_PS_4K;
#elif PAGE_SIZE == PAGE_SIZE_16K
mdhdr.flags = MINIDUMP_FLAG_PS_16K;
#else
#error Unsupported page size
#endif
dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AARCH64_VERSION,
dumpsize);

View File

@ -286,10 +286,6 @@ vm_paddr_t dmap_phys_base; /* The start of the dmap region */
vm_paddr_t dmap_phys_max; /* The limit of the dmap region */
vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
/* This code assumes all L1 DMAP entries will be used */
CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
extern pt_entry_t pagetable_l0_ttbr1[];
#define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1))
@ -299,6 +295,15 @@ static u_int physmap_idx;
static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"VM/pmap parameters");
#if PAGE_SIZE == PAGE_SIZE_4K
#define L1_BLOCKS_SUPPORTED 1
#else
/* TODO: Make this dynamic when we support FEAT_LPA2 (TCR_EL1.DS == 1) */
#define L1_BLOCKS_SUPPORTED 0
#endif
#define PMAP_ASSERT_L1_BLOCKS_SUPPORTED MPASS(L1_BLOCKS_SUPPORTED)
/*
* This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs
* that it has currently allocated to a pmap, a cursor ("asid_next") to
@ -571,6 +576,7 @@ pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
}
desc = pmap_load(l1) & ATTR_DESCR_MASK;
if (desc == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
*level = 1;
return (l1);
}
@ -621,9 +627,11 @@ pmap_pte_exists(pmap_t pmap, vm_offset_t va, int level, const char *diag)
if (desc == L0_TABLE && level > 0) {
l1p = pmap_l0_to_l1(l0p, va);
desc = pmap_load(l1p) & ATTR_DESCR_MASK;
if (desc == L1_BLOCK && level == 1)
if (desc == L1_BLOCK && level == 1) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
return (l1p);
else if (desc == L1_TABLE && level > 1) {
}
if (desc == L1_TABLE && level > 1) {
l2p = pmap_l1_to_l2(l1p, va);
desc = pmap_load(l2p) & ATTR_DESCR_MASK;
if (desc == L2_BLOCK && level == 2)
@ -673,6 +681,7 @@ pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
*l1 = l1p;
if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
*l2 = NULL;
*l3 = NULL;
return (true);
@ -1013,29 +1022,36 @@ pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa,
pmap_bootstrap_dmap_l3_page(&state, i);
MPASS(state.pa <= physmap[i + 1]);
/* Create L2 mappings at the start of the region */
if ((state.pa & L1_OFFSET) != 0)
if (L1_BLOCKS_SUPPORTED) {
/* Create L2 mappings at the start of the region */
if ((state.pa & L1_OFFSET) != 0)
pmap_bootstrap_dmap_l2_block(&state, i);
MPASS(state.pa <= physmap[i + 1]);
/* Create the main L1 block mappings */
for (; state.va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state.pa) >= L1_SIZE;
state.va += L1_SIZE, state.pa += L1_SIZE) {
/* Make sure there is a valid L1 table */
pmap_bootstrap_dmap_l0_table(&state);
MPASS((state.pa & L1_OFFSET) == 0);
pmap_store(&state.l1[pmap_l1_index(state.va)],
state.pa | ATTR_DEFAULT | ATTR_S1_XN |
ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
L1_BLOCK);
}
MPASS(state.pa <= physmap[i + 1]);
/* Create L2 mappings at the end of the region */
pmap_bootstrap_dmap_l2_block(&state, i);
MPASS(state.pa <= physmap[i + 1]);
/* Create the main L1 block mappings */
for (; state.va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state.pa) >= L1_SIZE;
state.va += L1_SIZE, state.pa += L1_SIZE) {
/* Make sure there is a valid L1 table */
pmap_bootstrap_dmap_l0_table(&state);
MPASS((state.pa & L1_OFFSET) == 0);
pmap_store(&state.l1[pmap_l1_index(state.va)],
state.pa | ATTR_DEFAULT | ATTR_S1_XN |
ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
L1_BLOCK);
} else {
while (state.va < DMAP_MAX_ADDRESS &&
(physmap[i + 1] - state.pa) >= L2_SIZE) {
pmap_bootstrap_dmap_l2_block(&state, i);
}
}
MPASS(state.pa <= physmap[i + 1]);
/* Create L2 mappings at the end of the region */
pmap_bootstrap_dmap_l2_block(&state, i);
MPASS(state.pa <= physmap[i + 1]);
/* Create L3 mappings at the end of the region */
pmap_bootstrap_dmap_l3_page(&state, i);
MPASS(state.pa == physmap[i + 1]);
@ -1261,9 +1277,11 @@ pmap_init(void)
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
("pmap_init: can't assign to pagesizes[1]"));
pagesizes[1] = L2_SIZE;
KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
("pmap_init: can't assign to pagesizes[2]"));
pagesizes[2] = L1_SIZE;
if (L1_BLOCKS_SUPPORTED) {
KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
("pmap_init: can't assign to pagesizes[2]"));
pagesizes[2] = L1_SIZE;
}
}
/*
@ -1483,6 +1501,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
pa = tpte & ~ATTR_MASK;
switch(lvl) {
case 1:
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
("pmap_extract: Invalid L1 pte found: %lx",
tpte & ATTR_DESCR_MASK));
@ -1530,6 +1549,10 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
KASSERT(lvl > 0 && lvl <= 3,
("pmap_extract_and_hold: Invalid level %d", lvl));
/*
* Check that the pte is either a L3 page, or a L1 or L2 block
* entry. We can assume L1_BLOCK == L2_BLOCK.
*/
KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
(lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
@ -2426,8 +2449,13 @@ pmap_growkernel(vm_offset_t addr)
***************************************************/
CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
#if PAGE_SIZE == PAGE_SIZE_4K
CTASSERT(_NPCM == 3);
CTASSERT(_NPCPV == 168);
#else
CTASSERT(_NPCM == 11);
CTASSERT(_NPCPV == 677);
#endif
static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
@ -2438,11 +2466,30 @@ pv_to_chunk(pv_entry_t pv)
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
#define PC_FREE0 0xfffffffffffffffful
#define PC_FREE1 0xfffffffffffffffful
#define PC_FREE2 0x000000fffffffffful
#define PC_FREEN 0xfffffffffffffffful
#if _NPCM == 3
#define PC_FREEL 0x000000fffffffffful
#elif _NPCM == 11
#define PC_FREEL 0x0000001ffffffffful
#endif
static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
#if _NPCM == 3
#define PC_IS_FREE(pc) ((pc)->pc_map[0] == PC_FREEN && \
(pc)->pc_map[1] == PC_FREEN && (pc)->pc_map[2] == PC_FREEL)
#else
#define PC_IS_FREE(pc) \
(memcmp((pc)->pc_map, pc_freemask, sizeof(pc_freemask)) == 0)
#endif
static const uint64_t pc_freemask[] = { PC_FREEN, PC_FREEN,
#if _NPCM > 3
PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN, PC_FREEN,
PC_FREEN,
#endif
PC_FREEL
};
CTASSERT(nitems(pc_freemask) == _NPCM);
#ifdef PV_STATS
static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
@ -2608,8 +2655,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
PV_STAT(atomic_add_int(&pv_entry_spare, freed));
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
pc->pc_map[2] == PC_FREE2) {
if (PC_IS_FREE(pc)) {
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
@ -2678,8 +2724,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv)
field = idx / 64;
bit = idx % 64;
pc->pc_map[field] |= 1ul << bit;
if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
pc->pc_map[2] != PC_FREE2) {
if (!PC_IS_FREE(pc)) {
/* 98% of the time, pc is already at the head of the list. */
if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
@ -2767,9 +2812,8 @@ get_pv_entry(pmap_t pmap, struct rwlock **lockp)
dump_add_page(m->phys_addr);
pc = (void *)PHYS_TO_DMAP(m->phys_addr);
pc->pc_pmap = pmap;
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
pc->pc_map[0] &= ~1ul; /* preallocated bit 0 */
mtx_lock(&pv_chunks_mutex);
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
@ -2829,9 +2873,7 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
dump_add_page(m->phys_addr);
pc = (void *)PHYS_TO_DMAP(m->phys_addr);
pc->pc_pmap = pmap;
pc->pc_map[0] = PC_FREE0;
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask));
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
@ -3265,6 +3307,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
@ -3518,6 +3561,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
@ -3848,9 +3892,10 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags,
mp->ref_count++;
}
}
KASSERT((origpte & ATTR_DESCR_VALID) == 0 ||
((origpte & ATTR_DESCR_MASK) == L1_BLOCK &&
(origpte & ~ATTR_MASK) == (newpte & ~ATTR_MASK)),
KASSERT((origpte & ~ATTR_MASK) == (newpte & ~ATTR_MASK) ||
(L1_BLOCKS_SUPPORTED &&
(origpte & ATTR_DESCR_MASK) == L1_BLOCK &&
(origpte & ATTR_DESCR_VALID) == 0),
("va %#lx changing 1G phys page l1 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l1p, newpte);
@ -3980,9 +4025,10 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
new_l3 &= ~L3_PAGE;
if (psind == 2)
if (psind == 2) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
new_l3 |= L1_BLOCK;
else /* (psind == 1) */
} else /* (psind == 1) */
new_l3 |= L2_BLOCK;
rv = pmap_enter_largepage(pmap, va, new_l3, flags, psind);
goto out;
@ -4660,6 +4706,7 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
@ -4772,6 +4819,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= end_addr,
("partial update of non-transparent 1G page "
"l1 %#lx addr %#lx end_addr %#lx va_next %#lx",
@ -5730,6 +5778,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
if (pmap_load(l1) == 0)
continue;
if ((pmap_load(l1) & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(va_next <= eva,
("partial update of non-transparent 1G page "
"l1 %#lx sva %#lx eva %#lx va_next %#lx",
@ -6243,6 +6292,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot,
default:
panic("Invalid DMAP table level: %d\n", lvl);
case 1:
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
if ((tmpva & L1_OFFSET) == 0 &&
(base + size - tmpva) >= L1_SIZE) {
pte_size = L1_SIZE;
@ -6318,6 +6368,7 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldl1 = pmap_load(l1);
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
("pmap_demote_l1: Demoting a non-block entry"));
KASSERT((va & L1_OFFSET) == 0,
@ -7400,6 +7451,7 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS)
continue;
}
if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
sysctl_kmaps_check(sb, &range, sva, l0e, l1e,
0, 0);
range.l1blocks++;

View File

@ -106,8 +106,18 @@ typedef struct pv_entry {
* pv_entries are allocated in chunks per-process. This avoids the
* need to track per-pmap assignments.
*/
#if PAGE_SIZE == PAGE_SIZE_4K
#define _NPCM 3
#define _NPCPV 168
#define _NPAD 0
#elif PAGE_SIZE == PAGE_SIZE_16K
#define _NPCM 11
#define _NPCPV 677
#define _NPAD 1
#else
#error Unsupported page size
#endif
#define PV_CHUNK_HEADER \
pmap_t pc_pmap; \
TAILQ_ENTRY(pv_chunk) pc_list; \
@ -121,6 +131,7 @@ struct pv_chunk_header {
struct pv_chunk {
PV_CHUNK_HEADER
struct pv_entry pc_pventry[_NPCPV];
uint64_t pc_pad[_NPAD];
};
struct thread;

View File

@ -109,33 +109,43 @@ typedef uint64_t pt_entry_t; /* page table entry */
#define ATTR_DESCR_TYPE_PAGE 2
#define ATTR_DESCR_TYPE_BLOCK 0
/* Level 0 table, 512GiB per entry */
#if PAGE_SIZE == PAGE_SIZE_4K
#define L0_SHIFT 39
#define L0_SIZE (1ul << L0_SHIFT)
#define L1_SHIFT 30
#define L2_SHIFT 21
#define L3_SHIFT 12
#elif PAGE_SIZE == PAGE_SIZE_16K
#define L0_SHIFT 47
#define L1_SHIFT 36
#define L2_SHIFT 25
#define L3_SHIFT 14
#else
#error Unsupported page size
#endif
/* Level 0 table, 512GiB/128TiB per entry */
#define L0_SIZE (UINT64_C(1) << L0_SHIFT)
#define L0_OFFSET (L0_SIZE - 1ul)
#define L0_INVAL 0x0 /* An invalid address */
/* 0x1 Level 0 doesn't support block translation */
/* 0x2 also marks an invalid address */
#define L0_TABLE 0x3 /* A next-level table */
/* Level 1 table, 1GiB per entry */
#define L1_SHIFT 30
#define L1_SIZE (1 << L1_SHIFT)
/* Level 1 table, 1GiB/64GiB per entry */
#define L1_SIZE (UINT64_C(1) << L1_SHIFT)
#define L1_OFFSET (L1_SIZE - 1)
#define L1_INVAL L0_INVAL
#define L1_BLOCK 0x1
#define L1_TABLE L0_TABLE
/* Level 2 table, 2MiB per entry */
#define L2_SHIFT 21
#define L2_SIZE (1 << L2_SHIFT)
/* Level 2 table, 2MiB/32MiB per entry */
#define L2_SIZE (UINT64_C(1) << L2_SHIFT)
#define L2_OFFSET (L2_SIZE - 1)
#define L2_INVAL L1_INVAL
#define L2_BLOCK L1_BLOCK
#define L2_BLOCK 0x1
#define L2_TABLE L1_TABLE
/* Level 3 table, 4KiB per entry */
#define L3_SHIFT 12
/* Level 3 table, 4KiB/16KiB per entry */
#define L3_SIZE (1 << L3_SHIFT)
#define L3_OFFSET (L3_SIZE - 1)
#define L3_INVAL 0x0
@ -145,11 +155,19 @@ typedef uint64_t pt_entry_t; /* page table entry */
#define PMAP_MAPDEV_EARLY_SIZE (L2_SIZE * 8)
#if PAGE_SIZE == PAGE_SIZE_4K
#define L0_ENTRIES_SHIFT 9
#define Ln_ENTRIES_SHIFT 9
#elif PAGE_SIZE == PAGE_SIZE_16K
#define L0_ENTRIES_SHIFT 1
#define Ln_ENTRIES_SHIFT 11
#else
#error Unsupported page size
#endif
#define L0_ENTRIES (1 << L0_ENTRIES_SHIFT)
#define L0_ADDR_MASK (L0_ENTRIES - 1)
#define Ln_ENTRIES_SHIFT 9
#define Ln_ENTRIES (1 << Ln_ENTRIES_SHIFT)
#define Ln_ADDR_MASK (Ln_ENTRIES - 1)
#define Ln_TABLE_MASK ((1 << 12) - 1)