amd64 pmap: LA57 AKA 5-level paging

Since LA57 was moved to the main SDM document with revision 072, it
seems that we should have a support for it, and silicons are coming.

This patch makes pmap support both LA48 and LA57 hardware.  The
selection of page table level is done at startup, kernel always
receives control from loader with 4-level paging.  It is not clear how
UEFI spec would adapt LA57, for instance it could hand out control in
LA57 mode sometimes.

To switch from LA48 to LA57 requires turning off long mode, requesting
LA57 in CR4, then re-entering long mode.  This is somewhat delicate
and done in pmap_bootstrap_la57().  AP startup in LA57 mode is much
easier, we only need to toggle a bit in CR4 and load right value in CR3.

I decided to not change kernel map for now.  Single PML5 entry is
created that points to the existing kernel_pml4 (KML4Phys) page, and a
pml5 entry to create our recursive mapping for vtopte()/vtopde().
This decision is motivated by the fact that we cannot overcommit for
KVA, so large space there is unusable until machines start providing
wider physical memory addressing.  Another reason is that I do not
want to break our fragile autotuning, so the KVA expansion is not
included into this first step.  Nice side effect is that minidumps are
compatible.

On the other hand, (very) large address space is definitely
immediately useful for some userspace applications.

For userspace, numbering of pte entries (or page table pages) is
always done for 5-level structures even if we operate in 4-level mode.
The pmap_is_la57() function is added to report the mode of the
specified pmap, this is done not to allow simultaneous 4-/5-levels
(which is not allowed by hw), but to accomodate for EPT which has
separate level control and in principle might not allow 5-leve EPT
despite x86 paging supports it. Anyway, it does not seems critical to
have 5-level EPT support now.

Tested by:	pho (LA48 hardware)
Reviewed by:	alc
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D25273
This commit is contained in:
Konstantin Belousov 2020-08-23 20:19:04 +00:00
parent 4ba405dcdb
commit 9ce875d9b5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=364527
15 changed files with 951 additions and 227 deletions

View File

@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
#include <machine/fpu.h>
#include <machine/md_var.h>
struct sysentvec elf64_freebsd_sysvec = {
struct sysentvec elf64_freebsd_sysvec_la48 = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
.sv_errsize = 0,
@ -64,9 +64,9 @@ struct sysentvec elf64_freebsd_sysvec = {
.sv_imgact_try = NULL,
.sv_minsigstksz = MINSIGSTKSZ,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
.sv_psstrings = PS_STRINGS,
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48,
.sv_usrstack = USRSTACK_LA48,
.sv_psstrings = PS_STRINGS_LA48,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs),
.sv_copyout_strings = exec_copyout_strings,
@ -78,14 +78,64 @@ struct sysentvec elf64_freebsd_sysvec = {
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
.sv_shared_page_base = SHAREDPAGE,
.sv_shared_page_base = SHAREDPAGE_LA48,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = NULL,
.sv_thread_detach = NULL,
.sv_trap = NULL,
.sv_stackgap = elf64_stackgap,
};
INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
struct sysentvec elf64_freebsd_sysvec_la57 = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
.sv_errsize = 0,
.sv_errtbl = NULL,
.sv_transtrap = NULL,
.sv_fixup = __elfN(freebsd_fixup),
.sv_sendsig = sendsig,
.sv_sigcode = sigcode,
.sv_szsigcode = &szsigcode,
.sv_name = "FreeBSD ELF64",
.sv_coredump = __elfN(coredump),
.sv_imgact_try = NULL,
.sv_minsigstksz = MINSIGSTKSZ,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS_LA57,
.sv_usrstack = USRSTACK_LA57,
.sv_psstrings = PS_STRINGS_LA57,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs),
.sv_copyout_strings = exec_copyout_strings,
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP |
SV_TIMEKEEP,
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
.sv_shared_page_base = SHAREDPAGE_LA57,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = NULL,
.sv_thread_detach = NULL,
.sv_trap = NULL,
.sv_stackgap = elf64_stackgap,
};
static void
amd64_init_sysvecs(void *arg)
{
amd64_lower_shared_page(&elf64_freebsd_sysvec_la48);
if (la57) {
exec_sysvec_init(&elf64_freebsd_sysvec_la57);
exec_sysvec_init_secondary(&elf64_freebsd_sysvec_la57,
&elf64_freebsd_sysvec_la48);
} else {
exec_sysvec_init(&elf64_freebsd_sysvec_la48);
}
}
SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, amd64_init_sysvecs, NULL);
void
amd64_lower_shared_page(struct sysentvec *sv)
@ -98,29 +148,57 @@ amd64_lower_shared_page(struct sysentvec *sv)
}
}
/*
* Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter
* uses the value of sv_shared_page_base.
*/
SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST,
(sysinit_cfunc_t) amd64_lower_shared_page,
&elf64_freebsd_sysvec);
static boolean_t
freebsd_brand_info_la57_img_compat(struct image_params *imgp,
int32_t *osrel __unused, uint32_t *fctl0)
{
if ((imgp->proc->p_md.md_flags & P_MD_LA57) != 0)
return (TRUE);
if (fctl0 == NULL || (*fctl0 & NT_FREEBSD_FCTL_LA48) != 0)
return (FALSE);
if ((imgp->proc->p_md.md_flags & P_MD_LA48) != 0)
return (FALSE);
return (TRUE);
}
static Elf64_Brandinfo freebsd_brand_info = {
static Elf64_Brandinfo freebsd_brand_info_la48 = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
.compat_3_brand = "FreeBSD",
.emul_path = NULL,
.interp_path = "/libexec/ld-elf.so.1",
.sysvec = &elf64_freebsd_sysvec,
.sysvec = &elf64_freebsd_sysvec_la48,
.interp_newpath = NULL,
.brand_note = &elf64_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
};
static Elf64_Brandinfo freebsd_brand_info_la57 = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
.compat_3_brand = "FreeBSD",
.emul_path = NULL,
.interp_path = "/libexec/ld-elf.so.1",
.sysvec = &elf64_freebsd_sysvec_la57,
.interp_newpath = NULL,
.brand_note = &elf64_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
.header_supported = freebsd_brand_info_la57_img_compat,
};
static void
sysinit_register_elf64_brand_entries(void *arg __unused)
{
/*
* _57 must go first so it can either claim the image or hand
* it to _48.
*/
if (la57)
elf64_insert_brand_entry(&freebsd_brand_info_la57);
elf64_insert_brand_entry(&freebsd_brand_info_la48);
}
SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
(sysinit_cfunc_t) elf64_insert_brand_entry,
&freebsd_brand_info);
sysinit_register_elf64_brand_entries, NULL);
static Elf64_Brandinfo freebsd_brand_oinfo = {
.brand = ELFOSABI_FREEBSD,
@ -128,15 +206,14 @@ static Elf64_Brandinfo freebsd_brand_oinfo = {
.compat_3_brand = "FreeBSD",
.emul_path = NULL,
.interp_path = "/usr/libexec/ld-elf.so.1",
.sysvec = &elf64_freebsd_sysvec,
.sysvec = &elf64_freebsd_sysvec_la48,
.interp_newpath = NULL,
.brand_note = &elf64_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
(sysinit_cfunc_t) elf64_insert_brand_entry,
&freebsd_brand_oinfo);
(sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
static Elf64_Brandinfo kfreebsd_brand_info = {
.brand = ELFOSABI_FREEBSD,
@ -144,15 +221,14 @@ static Elf64_Brandinfo kfreebsd_brand_info = {
.compat_3_brand = "FreeBSD",
.emul_path = NULL,
.interp_path = "/lib/ld-kfreebsd-x86-64.so.1",
.sysvec = &elf64_freebsd_sysvec,
.sysvec = &elf64_freebsd_sysvec_la48,
.interp_newpath = NULL,
.brand_note = &elf64_kfreebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
};
SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
(sysinit_cfunc_t) elf64_insert_brand_entry,
&kfreebsd_brand_info);
(sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info);
void
elf64_dump_thread(struct thread *td, void *dst, size_t *off)

View File

@ -99,11 +99,10 @@ ASSYM(TDP_KTHREAD, TDP_KTHREAD);
ASSYM(PAGE_SIZE, PAGE_SIZE);
ASSYM(NPTEPG, NPTEPG);
ASSYM(NPDEPG, NPDEPG);
ASSYM(addr_PTmap, addr_PTmap);
ASSYM(addr_PDmap, addr_PDmap);
ASSYM(addr_PDPmap, addr_PDPmap);
ASSYM(addr_PML4map, addr_PML4map);
ASSYM(addr_PML4pml4e, addr_PML4pml4e);
ASSYM(addr_P4Tmap, addr_P4Tmap);
ASSYM(addr_P4Dmap, addr_P4Dmap);
ASSYM(addr_P5Tmap, addr_P5Tmap);
ASSYM(addr_P5Dmap, addr_P5Dmap);
ASSYM(PDESIZE, sizeof(pd_entry_t));
ASSYM(PTESIZE, sizeof(pt_entry_t));
ASSYM(PAGE_SHIFT, PAGE_SHIFT);

View File

@ -36,13 +36,8 @@
/*
* Compiled KERNBASE location
*/
.globl kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend
.globl kernbase, loc_PTmap, loc_PDmap, loc_PDPmap, dmapbase, dmapend
.set kernbase,KERNBASE
.set loc_PTmap,addr_PTmap
.set loc_PDmap,addr_PDmap
.set loc_PDPmap,addr_PDPmap
.set loc_PML4map,addr_PML4map
.set loc_PML4pml4e,addr_PML4pml4e
.set dmapbase,DMAP_MIN_ADDRESS
.set dmapend,DMAP_MAX_ADDRESS
@ -82,6 +77,62 @@ NON_GPROF_ENTRY(btext)
0: hlt
jmp 0b
/* la57_trampoline(%rdi pml5) */
NON_GPROF_ENTRY(la57_trampoline)
movq %rsp,%r11
movq %rbx,%r10
leaq la57_trampoline_end(%rip),%rsp
movq %cr0,%rdx
lgdtq la57_trampoline_gdt_desc(%rip)
pushq $(2<<3)
leaq l1(%rip),%rax
leaq l2(%rip),%rbx
pushq %rax
lretq
.code32
l1: movl $(3<<3),%eax
movl %eax,%ss
movl %edx,%eax
andl $~CR0_PG,%eax
movl %eax,%cr0
movl %cr4,%eax
orl $CR4_LA57,%eax
movl %eax,%cr4
movl %edi,%cr3
movl %edx,%cr0
pushl $(1<<3)
pushl %ebx
lretl
.code64
l2: movq %r11,%rsp
movq %r10,%rbx
retq
.p2align 4,0
NON_GPROF_ENTRY(la57_trampoline_gdt_desc)
.word la57_trampoline_end - la57_trampoline_gdt
.long 0 /* filled by pmap_bootstrap_la57 */
.p2align 4,0
NON_GPROF_ENTRY(la57_trampoline_gdt)
.long 0x00000000 /* null desc */
.long 0x00000000
.long 0x00000000 /* 64bit code */
.long 0x00209800
.long 0x0000ffff /* 32bit code */
.long 0x00cf9b00
.long 0x0000ffff /* universal data */
.long 0x00cf9300
.dcb.l 16,0
NON_GPROF_ENTRY(la57_trampoline_end)
.bss
ALIGN_DATA /* just to be sure */
.globl bootstack

View File

@ -96,7 +96,7 @@ __FBSDID("$FreeBSD$");
#define GiB(v) (v ## ULL << 30)
#define AP_BOOTPT_SZ (PAGE_SIZE * 3)
#define AP_BOOTPT_SZ (PAGE_SIZE * 4)
/* Temporary variables for init_secondary() */
char *doublefault_stack;
@ -104,6 +104,8 @@ char *mce_stack;
char *nmi_stack;
char *dbg_stack;
extern u_int mptramp_la57;
/*
* Local data and functions.
*/
@ -240,6 +242,8 @@ cpu_mp_start(void)
assign_cpu_ids();
mptramp_la57 = la57;
/* Start each Application Processor */
init_ops.start_all_aps();
@ -395,9 +399,9 @@ mp_realloc_pcpu(int cpuid, int domain)
int
native_start_all_aps(void)
{
u_int64_t *pt4, *pt3, *pt2;
u_int64_t *pt5, *pt4, *pt3, *pt2;
u_int32_t mpbioswarmvec;
int apic_id, cpu, domain, i;
int apic_id, cpu, domain, i, xo;
u_char mpbiosreason;
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
@ -406,18 +410,38 @@ native_start_all_aps(void)
bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size);
/* Locate the page tables, they'll be below the trampoline */
pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
if (la57) {
pt5 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
xo = 1;
} else {
xo = 0;
}
pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables + xo * PAGE_SIZE);
pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
/* Create the initial 1GB replicated page tables */
for (i = 0; i < 512; i++) {
/* Each slot of the level 4 pages points to the same level 3 page */
pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
if (la57) {
pt5[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
PAGE_SIZE);
pt5[i] |= PG_V | PG_RW | PG_U;
}
/*
* Each slot of the level 4 pages points to the same
* level 3 page.
*/
pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
(xo + 1) * PAGE_SIZE);
pt4[i] |= PG_V | PG_RW | PG_U;
/* Each slot of the level 3 pages points to the same level 2 page */
pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
/*
* Each slot of the level 3 pages points to the same
* level 2 page.
*/
pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
((xo + 2) * PAGE_SIZE));
pt3[i] |= PG_V | PG_RW | PG_U;
/* The level 2 page slots are mapped with 2MB pages for 1GB. */

View File

@ -90,10 +90,16 @@ protmode:
mov $bootdata-gdt, %eax
mov %ax, %ds
/* Turn on the PAE bit for when paging is enabled */
/*
* Turn on the PAE bit and optionally the LA57 bit for when paging
* is later enabled.
*/
mov %cr4, %eax
orl $CR4_PAE, %eax
mov %eax, %cr4
cmpb $0, mptramp_la57-mptramp_start(%ebx)
je 1f
orl $CR4_LA57, %eax
1: mov %eax, %cr4
/*
* Enable EFER.LME so that we get long mode when all the prereqs are
@ -132,9 +138,9 @@ protmode:
/*
* At this point paging is enabled, and we are in "compatibility" mode.
* We do another far jump to reload %cs with the 64 bit selector.
* %cr3 points to a 4-level page table page.
* %cr3 points to a 4- or 5-level page table.
* We cannot yet jump all the way to the kernel because we can only
* specify a 32 bit linear address. So, yet another trampoline.
* specify a 32 bit linear address. So, we use yet another trampoline.
*
* The following instruction is:
* ljmp $kernelcode-gdt, $tramp_64-mptramp_start
@ -209,6 +215,11 @@ gdtend:
mptramp_pagetables:
.long 0
/* 5-level paging ? */
.globl mptramp_la57
mptramp_la57:
.long 0
/*
* The pseudo descriptor for lgdt to use.
*/
@ -251,8 +262,12 @@ entry_64:
* Load a real %cr3 that has all the direct map stuff and switches
* off the 1GB replicated mirror. Load a stack pointer and jump
* into AP startup code in C.
*/
*/
cmpl $0, la57
jne 2f
movq KPML4phys, %rax
movq %rax, %cr3
jmp 3f
2: movq KPML5phys, %rax
3: movq %rax, %cr3
movq bootSTK, %rsp
jmp init_secondary

File diff suppressed because it is too large Load Diff

View File

@ -46,6 +46,8 @@ extern int syscall_ret_l1d_flush_mode;
extern vm_paddr_t intel_graphics_stolen_base;
extern vm_paddr_t intel_graphics_stolen_size;
extern int la57;
/*
* The file "conf/ldscript.amd64" defines the symbol "kernphys". Its
* value is the physical address at which the kernel is loaded.

View File

@ -118,6 +118,12 @@
#define PML4SHIFT 39 /* LOG2(NBPML4) */
#define NBPML4 (1UL<<PML4SHIFT)/* bytes/page map lev4 table */
#define PML4MASK (NBPML4-1)
/* Size of the level 5 page-map level-5 table units */
#define NPML5EPG (PAGE_SIZE/(sizeof (pml5_entry_t)))
#define NPML5EPGSHIFT 9 /* LOG2(NPML5EPG) */
#define PML5SHIFT 48 /* LOG2(NBPML5) */
#define NBPML5 (1UL<<PML5SHIFT)/* bytes/page map lev5 table */
#define PML5MASK (NBPML5-1)
#define MAXPAGESIZES 3 /* maximum number of supported page sizes */

View File

@ -166,14 +166,22 @@
* Pte related macros. This is complicated by having to deal with
* the sign extension of the 48th bit.
*/
#define KVADDR(l4, l3, l2, l1) ( \
#define KV4ADDR(l4, l3, l2, l1) ( \
((unsigned long)-1 << 47) | \
((unsigned long)(l4) << PML4SHIFT) | \
((unsigned long)(l3) << PDPSHIFT) | \
((unsigned long)(l2) << PDRSHIFT) | \
((unsigned long)(l1) << PAGE_SHIFT))
#define KV5ADDR(l5, l4, l3, l2, l1) ( \
((unsigned long)-1 << 56) | \
((unsigned long)(l5) << PML5SHIFT) | \
((unsigned long)(l4) << PML4SHIFT) | \
((unsigned long)(l3) << PDPSHIFT) | \
((unsigned long)(l2) << PDRSHIFT) | \
((unsigned long)(l1) << PAGE_SHIFT))
#define UVADDR(l4, l3, l2, l1) ( \
#define UVADDR(l5, l4, l3, l2, l1) ( \
((unsigned long)(l5) << PML5SHIFT) | \
((unsigned long)(l4) << PML4SHIFT) | \
((unsigned long)(l3) << PDPSHIFT) | \
((unsigned long)(l2) << PDRSHIFT) | \
@ -187,9 +195,19 @@
*/
#define NKPML4E 4
#define NUPML4E (NPML4EPG/2) /* number of userland PML4 pages */
#define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */
#define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */
/*
* We use the same numbering of the page table pages for 5-level and
* 4-level paging structures.
*/
#define NUPML5E (NPML5EPG / 2) /* number of userland PML5
pages */
#define NUPML4E (NUPML5E * NPML4EPG) /* number of userland PML4
pages */
#define NUPDPE (NUPML4E * NPDPEPG) /* number of userland PDP
pages */
#define NUPDE (NUPDPE * NPDEPG) /* number of userland PD
entries */
#define NUP4ML4E (NPML4EPG / 2)
/*
* NDMPML4E is the maximum number of PML4 entries that will be
@ -216,7 +234,8 @@
* Or, in other words, KPML4I provides bits 39..47 of KERNBASE,
* and KPDPI provides bits 30..38.)
*/
#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
#define PML4PML4I (NPML4EPG / 2) /* Index of recursive pml4 mapping */
#define PML5PML5I (NPML5EPG / 2) /* Index of recursive pml5 mapping */
#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */
#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
@ -258,25 +277,34 @@ typedef u_int64_t pd_entry_t;
typedef u_int64_t pt_entry_t;
typedef u_int64_t pdp_entry_t;
typedef u_int64_t pml4_entry_t;
typedef u_int64_t pml5_entry_t;
/*
* Address of current address space page table maps and directories.
*/
#ifdef _KERNEL
#define addr_PTmap (KVADDR(PML4PML4I, 0, 0, 0))
#define addr_PDmap (KVADDR(PML4PML4I, PML4PML4I, 0, 0))
#define addr_PDPmap (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
#define addr_PML4map (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
#define addr_PML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
#define PTmap ((pt_entry_t *)(addr_PTmap))
#define PDmap ((pd_entry_t *)(addr_PDmap))
#define PDPmap ((pd_entry_t *)(addr_PDPmap))
#define PML4map ((pd_entry_t *)(addr_PML4map))
#define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e))
#define addr_P4Tmap (KV4ADDR(PML4PML4I, 0, 0, 0))
#define addr_P4Dmap (KV4ADDR(PML4PML4I, PML4PML4I, 0, 0))
#define addr_P4DPmap (KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
#define addr_P4ML4map (KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
#define addr_P4ML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
#define P4Tmap ((pt_entry_t *)(addr_P4Tmap))
#define P4Dmap ((pd_entry_t *)(addr_P4Dmap))
#define addr_P5Tmap (KV5ADDR(PML5PML5I, 0, 0, 0, 0))
#define addr_P5Dmap (KV5ADDR(PML5PML5I, PML5PML5I, 0, 0, 0))
#define addr_P5DPmap (KV5ADDR(PML5PML5I, PML5PML5I, PML5PML5I, 0, 0))
#define addr_P5ML4map (KV5ADDR(PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I, 0))
#define addr_P5ML5map \
(KVADDR(PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I))
#define addr_P5ML5pml5e (addr_P5ML5map + (PML5PML5I * sizeof(pml5_entry_t)))
#define P5Tmap ((pt_entry_t *)(addr_P5Tmap))
#define P5Dmap ((pd_entry_t *)(addr_P5Dmap))
extern int nkpt; /* Initial number of kernel page tables */
extern u_int64_t KPDPphys; /* physical address of kernel level 3 */
extern u_int64_t KPML4phys; /* physical address of kernel level 4 */
extern u_int64_t KPML5phys; /* physical address of kernel level 5 */
/*
* virtual address to page table entry and
@ -333,8 +361,8 @@ struct pmap_pcids {
*/
struct pmap {
struct mtx pm_mtx;
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
pml4_entry_t *pm_pml4u; /* KVA of user l4 page table */
pml4_entry_t *pm_pmltop; /* KVA of top level page table */
pml4_entry_t *pm_pmltopu; /* KVA of user top page table */
uint64_t pm_cr3;
uint64_t pm_ucr3;
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
@ -447,6 +475,7 @@ bool pmap_not_in_di(void);
boolean_t pmap_page_is_mapped(vm_page_t m);
void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
void pmap_pinit_pml4(vm_page_t);
void pmap_pinit_pml5(vm_page_t);
bool pmap_ps_enabled(pmap_t pmap);
void pmap_unmapdev(vm_offset_t, vm_size_t);
void pmap_invalidate_page(pmap_t, vm_offset_t);
@ -502,6 +531,13 @@ pmap_pml4e_index(vm_offset_t va)
return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
}
static __inline vm_pindex_t
pmap_pml5e_index(vm_offset_t va)
{
return ((va >> PML5SHIFT) & ((1ul << NPML5EPGSHIFT) - 1));
}
#endif /* !LOCORE */
#endif /* !_MACHINE_PMAP_H_ */

View File

@ -84,6 +84,8 @@ struct mdproc {
};
#define P_MD_KPTI 0x00000001 /* Enable KPTI on exec */
#define P_MD_LA48 0x00000002 /* Request LA48 after exec */
#define P_MD_LA57 0x00000004 /* Request LA57 after exec */
#define KINFO_PROC_SIZE 1088
#define KINFO_PROC32_SIZE 768

View File

@ -169,25 +169,32 @@
* 0xffffffff80000000 KERNBASE
*/
#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4BASE, 0, 0, 0)
#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4BASE + NKPML4E - 1, \
#define VM_MIN_KERNEL_ADDRESS KV4ADDR(KPML4BASE, 0, 0, 0)
#define VM_MAX_KERNEL_ADDRESS KV4ADDR(KPML4BASE + NKPML4E - 1, \
NPDPEPG-1, NPDEPG-1, NPTEPG-1)
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
#define DMAP_MIN_ADDRESS KV4ADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0)
#define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0)
#define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0)
#define LARGEMAP_MIN_ADDRESS KV4ADDR(LMSPML4I, 0, 0, 0)
#define LARGEMAP_MAX_ADDRESS KV4ADDR(LMEPML4I + 1, 0, 0, 0)
#define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0)
#define KERNBASE KV4ADDR(KPML4I, KPDPI, 0, 0)
#define UPT_MAX_ADDRESS KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
#define UPT_MIN_ADDRESS KVADDR(PML4PML4I, 0, 0, 0)
#define UPT_MAX_ADDRESS KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
#define UPT_MIN_ADDRESS KV4ADDR(PML4PML4I, 0, 0, 0)
#define VM_MAXUSER_ADDRESS UVADDR(NUPML4E, 0, 0, 0)
#define VM_MAXUSER_ADDRESS_LA57 UVADDR(NUPML5E, 0, 0, 0, 0)
#define VM_MAXUSER_ADDRESS_LA48 UVADDR(0, NUP4ML4E, 0, 0, 0)
#define VM_MAXUSER_ADDRESS VM_MAXUSER_ADDRESS_LA57
#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
#define USRSTACK SHAREDPAGE
#define SHAREDPAGE_LA57 (VM_MAXUSER_ADDRESS_LA57 - PAGE_SIZE)
#define SHAREDPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - PAGE_SIZE)
#define USRSTACK_LA57 SHAREDPAGE_LA57
#define USRSTACK_LA48 SHAREDPAGE_LA48
#define USRSTACK USRSTACK_LA48
#define PS_STRINGS_LA57 (USRSTACK_LA57 - sizeof(struct ps_strings))
#define PS_STRINGS_LA48 (USRSTACK_LA48 - sizeof(struct ps_strings))
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
#define VM_MIN_ADDRESS (0)

View File

@ -739,9 +739,9 @@ struct sysentvec elf_linux_sysvec = {
.sv_imgact_try = linux_exec_imgact_try,
.sv_minsigstksz = LINUX_MINSIGSTKSZ,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
.sv_psstrings = PS_STRINGS,
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48,
.sv_usrstack = USRSTACK_LA48,
.sv_psstrings = PS_STRINGS_LA48,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_auxargs = linux_copyout_auxargs,
.sv_copyout_strings = linux_copyout_strings,
@ -752,7 +752,7 @@ struct sysentvec elf_linux_sysvec = {
.sv_set_syscall_retval = linux_set_syscall_retval,
.sv_fetch_syscall_args = linux_fetch_syscall_args,
.sv_syscallnames = NULL,
.sv_shared_page_base = SHAREDPAGE,
.sv_shared_page_base = SHAREDPAGE_LA48,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = linux_schedtail,
.sv_thread_detach = linux_thread_detach,

View File

@ -560,7 +560,7 @@ svm_vminit(struct vm *vm, pmap_t pmap)
panic("contigmalloc of SVM IO bitmap failed");
svm_sc->vm = vm;
svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pmltop);
/*
* Intercept read and write accesses to all MSRs.

View File

@ -1030,7 +1030,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
}
vmx->vm = vm;
vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pml4));
vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pmltop));
/*
* Clean up EPTP-tagged guest physical and combined mappings

View File

@ -43,6 +43,7 @@
#include <machine/clock.h>
#include <machine/cpufunc.h>
#include <machine/frame.h>
#include <machine/md_var.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include <vm/pmap.h>
@ -131,7 +132,7 @@ dtrace_invop_uninit(void)
void
dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
{
(*func)(0, (uintptr_t) addr_PTmap);
(*func)(0, la57 ? (uintptr_t)addr_P5Tmap : (uintptr_t)addr_P4Tmap);
}
void