amd64 pmap: LA57 AKA 5-level paging
Since LA57 was moved to the main SDM document with revision 072, it seems that we should have a support for it, and silicons are coming. This patch makes pmap support both LA48 and LA57 hardware. The selection of page table level is done at startup, kernel always receives control from loader with 4-level paging. It is not clear how UEFI spec would adapt LA57, for instance it could hand out control in LA57 mode sometimes. To switch from LA48 to LA57 requires turning off long mode, requesting LA57 in CR4, then re-entering long mode. This is somewhat delicate and done in pmap_bootstrap_la57(). AP startup in LA57 mode is much easier, we only need to toggle a bit in CR4 and load right value in CR3. I decided to not change kernel map for now. Single PML5 entry is created that points to the existing kernel_pml4 (KML4Phys) page, and a pml5 entry to create our recursive mapping for vtopte()/vtopde(). This decision is motivated by the fact that we cannot overcommit for KVA, so large space there is unusable until machines start providing wider physical memory addressing. Another reason is that I do not want to break our fragile autotuning, so the KVA expansion is not included into this first step. Nice side effect is that minidumps are compatible. On the other hand, (very) large address space is definitely immediately useful for some userspace applications. For userspace, numbering of pte entries (or page table pages) is always done for 5-level structures even if we operate in 4-level mode. The pmap_is_la57() function is added to report the mode of the specified pmap, this is done not to allow simultaneous 4-/5-levels (which is not allowed by hw), but to accomodate for EPT which has separate level control and in principle might not allow 5-leve EPT despite x86 paging supports it. Anyway, it does not seems critical to have 5-level EPT support now. Tested by: pho (LA48 hardware) Reviewed by: alc Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D25273
This commit is contained in:
parent
4ba405dcdb
commit
9ce875d9b5
@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <machine/fpu.h>
|
||||
#include <machine/md_var.h>
|
||||
|
||||
struct sysentvec elf64_freebsd_sysvec = {
|
||||
struct sysentvec elf64_freebsd_sysvec_la48 = {
|
||||
.sv_size = SYS_MAXSYSCALL,
|
||||
.sv_table = sysent,
|
||||
.sv_errsize = 0,
|
||||
@ -64,9 +64,9 @@ struct sysentvec elf64_freebsd_sysvec = {
|
||||
.sv_imgact_try = NULL,
|
||||
.sv_minsigstksz = MINSIGSTKSZ,
|
||||
.sv_minuser = VM_MIN_ADDRESS,
|
||||
.sv_maxuser = VM_MAXUSER_ADDRESS,
|
||||
.sv_usrstack = USRSTACK,
|
||||
.sv_psstrings = PS_STRINGS,
|
||||
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48,
|
||||
.sv_usrstack = USRSTACK_LA48,
|
||||
.sv_psstrings = PS_STRINGS_LA48,
|
||||
.sv_stackprot = VM_PROT_ALL,
|
||||
.sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs),
|
||||
.sv_copyout_strings = exec_copyout_strings,
|
||||
@ -78,14 +78,64 @@ struct sysentvec elf64_freebsd_sysvec = {
|
||||
.sv_set_syscall_retval = cpu_set_syscall_retval,
|
||||
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
|
||||
.sv_syscallnames = syscallnames,
|
||||
.sv_shared_page_base = SHAREDPAGE,
|
||||
.sv_shared_page_base = SHAREDPAGE_LA48,
|
||||
.sv_shared_page_len = PAGE_SIZE,
|
||||
.sv_schedtail = NULL,
|
||||
.sv_thread_detach = NULL,
|
||||
.sv_trap = NULL,
|
||||
.sv_stackgap = elf64_stackgap,
|
||||
};
|
||||
INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
|
||||
|
||||
struct sysentvec elf64_freebsd_sysvec_la57 = {
|
||||
.sv_size = SYS_MAXSYSCALL,
|
||||
.sv_table = sysent,
|
||||
.sv_errsize = 0,
|
||||
.sv_errtbl = NULL,
|
||||
.sv_transtrap = NULL,
|
||||
.sv_fixup = __elfN(freebsd_fixup),
|
||||
.sv_sendsig = sendsig,
|
||||
.sv_sigcode = sigcode,
|
||||
.sv_szsigcode = &szsigcode,
|
||||
.sv_name = "FreeBSD ELF64",
|
||||
.sv_coredump = __elfN(coredump),
|
||||
.sv_imgact_try = NULL,
|
||||
.sv_minsigstksz = MINSIGSTKSZ,
|
||||
.sv_minuser = VM_MIN_ADDRESS,
|
||||
.sv_maxuser = VM_MAXUSER_ADDRESS_LA57,
|
||||
.sv_usrstack = USRSTACK_LA57,
|
||||
.sv_psstrings = PS_STRINGS_LA57,
|
||||
.sv_stackprot = VM_PROT_ALL,
|
||||
.sv_copyout_auxargs = __elfN(freebsd_copyout_auxargs),
|
||||
.sv_copyout_strings = exec_copyout_strings,
|
||||
.sv_setregs = exec_setregs,
|
||||
.sv_fixlimit = NULL,
|
||||
.sv_maxssiz = NULL,
|
||||
.sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP |
|
||||
SV_TIMEKEEP,
|
||||
.sv_set_syscall_retval = cpu_set_syscall_retval,
|
||||
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
|
||||
.sv_syscallnames = syscallnames,
|
||||
.sv_shared_page_base = SHAREDPAGE_LA57,
|
||||
.sv_shared_page_len = PAGE_SIZE,
|
||||
.sv_schedtail = NULL,
|
||||
.sv_thread_detach = NULL,
|
||||
.sv_trap = NULL,
|
||||
.sv_stackgap = elf64_stackgap,
|
||||
};
|
||||
|
||||
static void
|
||||
amd64_init_sysvecs(void *arg)
|
||||
{
|
||||
amd64_lower_shared_page(&elf64_freebsd_sysvec_la48);
|
||||
if (la57) {
|
||||
exec_sysvec_init(&elf64_freebsd_sysvec_la57);
|
||||
exec_sysvec_init_secondary(&elf64_freebsd_sysvec_la57,
|
||||
&elf64_freebsd_sysvec_la48);
|
||||
} else {
|
||||
exec_sysvec_init(&elf64_freebsd_sysvec_la48);
|
||||
}
|
||||
}
|
||||
SYSINIT(elf64_sysvec, SI_SUB_EXEC, SI_ORDER_ANY, amd64_init_sysvecs, NULL);
|
||||
|
||||
void
|
||||
amd64_lower_shared_page(struct sysentvec *sv)
|
||||
@ -98,29 +148,57 @@ amd64_lower_shared_page(struct sysentvec *sv)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter
|
||||
* uses the value of sv_shared_page_base.
|
||||
*/
|
||||
SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST,
|
||||
(sysinit_cfunc_t) amd64_lower_shared_page,
|
||||
&elf64_freebsd_sysvec);
|
||||
static boolean_t
|
||||
freebsd_brand_info_la57_img_compat(struct image_params *imgp,
|
||||
int32_t *osrel __unused, uint32_t *fctl0)
|
||||
{
|
||||
if ((imgp->proc->p_md.md_flags & P_MD_LA57) != 0)
|
||||
return (TRUE);
|
||||
if (fctl0 == NULL || (*fctl0 & NT_FREEBSD_FCTL_LA48) != 0)
|
||||
return (FALSE);
|
||||
if ((imgp->proc->p_md.md_flags & P_MD_LA48) != 0)
|
||||
return (FALSE);
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static Elf64_Brandinfo freebsd_brand_info = {
|
||||
static Elf64_Brandinfo freebsd_brand_info_la48 = {
|
||||
.brand = ELFOSABI_FREEBSD,
|
||||
.machine = EM_X86_64,
|
||||
.compat_3_brand = "FreeBSD",
|
||||
.emul_path = NULL,
|
||||
.interp_path = "/libexec/ld-elf.so.1",
|
||||
.sysvec = &elf64_freebsd_sysvec,
|
||||
.sysvec = &elf64_freebsd_sysvec_la48,
|
||||
.interp_newpath = NULL,
|
||||
.brand_note = &elf64_freebsd_brandnote,
|
||||
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
|
||||
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
|
||||
};
|
||||
|
||||
static Elf64_Brandinfo freebsd_brand_info_la57 = {
|
||||
.brand = ELFOSABI_FREEBSD,
|
||||
.machine = EM_X86_64,
|
||||
.compat_3_brand = "FreeBSD",
|
||||
.emul_path = NULL,
|
||||
.interp_path = "/libexec/ld-elf.so.1",
|
||||
.sysvec = &elf64_freebsd_sysvec_la57,
|
||||
.interp_newpath = NULL,
|
||||
.brand_note = &elf64_freebsd_brandnote,
|
||||
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
|
||||
.header_supported = freebsd_brand_info_la57_img_compat,
|
||||
};
|
||||
|
||||
static void
|
||||
sysinit_register_elf64_brand_entries(void *arg __unused)
|
||||
{
|
||||
/*
|
||||
* _57 must go first so it can either claim the image or hand
|
||||
* it to _48.
|
||||
*/
|
||||
if (la57)
|
||||
elf64_insert_brand_entry(&freebsd_brand_info_la57);
|
||||
elf64_insert_brand_entry(&freebsd_brand_info_la48);
|
||||
}
|
||||
SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
|
||||
(sysinit_cfunc_t) elf64_insert_brand_entry,
|
||||
&freebsd_brand_info);
|
||||
sysinit_register_elf64_brand_entries, NULL);
|
||||
|
||||
static Elf64_Brandinfo freebsd_brand_oinfo = {
|
||||
.brand = ELFOSABI_FREEBSD,
|
||||
@ -128,15 +206,14 @@ static Elf64_Brandinfo freebsd_brand_oinfo = {
|
||||
.compat_3_brand = "FreeBSD",
|
||||
.emul_path = NULL,
|
||||
.interp_path = "/usr/libexec/ld-elf.so.1",
|
||||
.sysvec = &elf64_freebsd_sysvec,
|
||||
.sysvec = &elf64_freebsd_sysvec_la48,
|
||||
.interp_newpath = NULL,
|
||||
.brand_note = &elf64_freebsd_brandnote,
|
||||
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
|
||||
};
|
||||
|
||||
SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
|
||||
(sysinit_cfunc_t) elf64_insert_brand_entry,
|
||||
&freebsd_brand_oinfo);
|
||||
(sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
|
||||
|
||||
static Elf64_Brandinfo kfreebsd_brand_info = {
|
||||
.brand = ELFOSABI_FREEBSD,
|
||||
@ -144,15 +221,14 @@ static Elf64_Brandinfo kfreebsd_brand_info = {
|
||||
.compat_3_brand = "FreeBSD",
|
||||
.emul_path = NULL,
|
||||
.interp_path = "/lib/ld-kfreebsd-x86-64.so.1",
|
||||
.sysvec = &elf64_freebsd_sysvec,
|
||||
.sysvec = &elf64_freebsd_sysvec_la48,
|
||||
.interp_newpath = NULL,
|
||||
.brand_note = &elf64_kfreebsd_brandnote,
|
||||
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
|
||||
};
|
||||
|
||||
SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
|
||||
(sysinit_cfunc_t) elf64_insert_brand_entry,
|
||||
&kfreebsd_brand_info);
|
||||
(sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info);
|
||||
|
||||
void
|
||||
elf64_dump_thread(struct thread *td, void *dst, size_t *off)
|
||||
|
@ -99,11 +99,10 @@ ASSYM(TDP_KTHREAD, TDP_KTHREAD);
|
||||
ASSYM(PAGE_SIZE, PAGE_SIZE);
|
||||
ASSYM(NPTEPG, NPTEPG);
|
||||
ASSYM(NPDEPG, NPDEPG);
|
||||
ASSYM(addr_PTmap, addr_PTmap);
|
||||
ASSYM(addr_PDmap, addr_PDmap);
|
||||
ASSYM(addr_PDPmap, addr_PDPmap);
|
||||
ASSYM(addr_PML4map, addr_PML4map);
|
||||
ASSYM(addr_PML4pml4e, addr_PML4pml4e);
|
||||
ASSYM(addr_P4Tmap, addr_P4Tmap);
|
||||
ASSYM(addr_P4Dmap, addr_P4Dmap);
|
||||
ASSYM(addr_P5Tmap, addr_P5Tmap);
|
||||
ASSYM(addr_P5Dmap, addr_P5Dmap);
|
||||
ASSYM(PDESIZE, sizeof(pd_entry_t));
|
||||
ASSYM(PTESIZE, sizeof(pt_entry_t));
|
||||
ASSYM(PAGE_SHIFT, PAGE_SHIFT);
|
||||
|
@ -36,13 +36,8 @@
|
||||
/*
|
||||
* Compiled KERNBASE location
|
||||
*/
|
||||
.globl kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend
|
||||
.globl kernbase, loc_PTmap, loc_PDmap, loc_PDPmap, dmapbase, dmapend
|
||||
.set kernbase,KERNBASE
|
||||
.set loc_PTmap,addr_PTmap
|
||||
.set loc_PDmap,addr_PDmap
|
||||
.set loc_PDPmap,addr_PDPmap
|
||||
.set loc_PML4map,addr_PML4map
|
||||
.set loc_PML4pml4e,addr_PML4pml4e
|
||||
.set dmapbase,DMAP_MIN_ADDRESS
|
||||
.set dmapend,DMAP_MAX_ADDRESS
|
||||
|
||||
@ -82,6 +77,62 @@ NON_GPROF_ENTRY(btext)
|
||||
0: hlt
|
||||
jmp 0b
|
||||
|
||||
/* la57_trampoline(%rdi pml5) */
|
||||
NON_GPROF_ENTRY(la57_trampoline)
|
||||
movq %rsp,%r11
|
||||
movq %rbx,%r10
|
||||
leaq la57_trampoline_end(%rip),%rsp
|
||||
|
||||
movq %cr0,%rdx
|
||||
lgdtq la57_trampoline_gdt_desc(%rip)
|
||||
|
||||
pushq $(2<<3)
|
||||
leaq l1(%rip),%rax
|
||||
leaq l2(%rip),%rbx
|
||||
|
||||
pushq %rax
|
||||
lretq
|
||||
.code32
|
||||
|
||||
l1: movl $(3<<3),%eax
|
||||
movl %eax,%ss
|
||||
|
||||
movl %edx,%eax
|
||||
andl $~CR0_PG,%eax
|
||||
movl %eax,%cr0
|
||||
|
||||
movl %cr4,%eax
|
||||
orl $CR4_LA57,%eax
|
||||
movl %eax,%cr4
|
||||
|
||||
movl %edi,%cr3
|
||||
movl %edx,%cr0
|
||||
|
||||
pushl $(1<<3)
|
||||
pushl %ebx
|
||||
lretl
|
||||
.code64
|
||||
|
||||
l2: movq %r11,%rsp
|
||||
movq %r10,%rbx
|
||||
retq
|
||||
.p2align 4,0
|
||||
NON_GPROF_ENTRY(la57_trampoline_gdt_desc)
|
||||
.word la57_trampoline_end - la57_trampoline_gdt
|
||||
.long 0 /* filled by pmap_bootstrap_la57 */
|
||||
.p2align 4,0
|
||||
NON_GPROF_ENTRY(la57_trampoline_gdt)
|
||||
.long 0x00000000 /* null desc */
|
||||
.long 0x00000000
|
||||
.long 0x00000000 /* 64bit code */
|
||||
.long 0x00209800
|
||||
.long 0x0000ffff /* 32bit code */
|
||||
.long 0x00cf9b00
|
||||
.long 0x0000ffff /* universal data */
|
||||
.long 0x00cf9300
|
||||
.dcb.l 16,0
|
||||
NON_GPROF_ENTRY(la57_trampoline_end)
|
||||
|
||||
.bss
|
||||
ALIGN_DATA /* just to be sure */
|
||||
.globl bootstack
|
||||
|
@ -96,7 +96,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#define GiB(v) (v ## ULL << 30)
|
||||
|
||||
#define AP_BOOTPT_SZ (PAGE_SIZE * 3)
|
||||
#define AP_BOOTPT_SZ (PAGE_SIZE * 4)
|
||||
|
||||
/* Temporary variables for init_secondary() */
|
||||
char *doublefault_stack;
|
||||
@ -104,6 +104,8 @@ char *mce_stack;
|
||||
char *nmi_stack;
|
||||
char *dbg_stack;
|
||||
|
||||
extern u_int mptramp_la57;
|
||||
|
||||
/*
|
||||
* Local data and functions.
|
||||
*/
|
||||
@ -240,6 +242,8 @@ cpu_mp_start(void)
|
||||
|
||||
assign_cpu_ids();
|
||||
|
||||
mptramp_la57 = la57;
|
||||
|
||||
/* Start each Application Processor */
|
||||
init_ops.start_all_aps();
|
||||
|
||||
@ -395,9 +399,9 @@ mp_realloc_pcpu(int cpuid, int domain)
|
||||
int
|
||||
native_start_all_aps(void)
|
||||
{
|
||||
u_int64_t *pt4, *pt3, *pt2;
|
||||
u_int64_t *pt5, *pt4, *pt3, *pt2;
|
||||
u_int32_t mpbioswarmvec;
|
||||
int apic_id, cpu, domain, i;
|
||||
int apic_id, cpu, domain, i, xo;
|
||||
u_char mpbiosreason;
|
||||
|
||||
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
|
||||
@ -406,18 +410,38 @@ native_start_all_aps(void)
|
||||
bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size);
|
||||
|
||||
/* Locate the page tables, they'll be below the trampoline */
|
||||
pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
|
||||
if (la57) {
|
||||
pt5 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
|
||||
xo = 1;
|
||||
} else {
|
||||
xo = 0;
|
||||
}
|
||||
pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables + xo * PAGE_SIZE);
|
||||
pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
|
||||
pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
|
||||
|
||||
/* Create the initial 1GB replicated page tables */
|
||||
for (i = 0; i < 512; i++) {
|
||||
/* Each slot of the level 4 pages points to the same level 3 page */
|
||||
pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
|
||||
if (la57) {
|
||||
pt5[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
|
||||
PAGE_SIZE);
|
||||
pt5[i] |= PG_V | PG_RW | PG_U;
|
||||
}
|
||||
|
||||
/*
|
||||
* Each slot of the level 4 pages points to the same
|
||||
* level 3 page.
|
||||
*/
|
||||
pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
|
||||
(xo + 1) * PAGE_SIZE);
|
||||
pt4[i] |= PG_V | PG_RW | PG_U;
|
||||
|
||||
/* Each slot of the level 3 pages points to the same level 2 page */
|
||||
pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
|
||||
/*
|
||||
* Each slot of the level 3 pages points to the same
|
||||
* level 2 page.
|
||||
*/
|
||||
pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
|
||||
((xo + 2) * PAGE_SIZE));
|
||||
pt3[i] |= PG_V | PG_RW | PG_U;
|
||||
|
||||
/* The level 2 page slots are mapped with 2MB pages for 1GB. */
|
||||
|
@ -90,10 +90,16 @@ protmode:
|
||||
mov $bootdata-gdt, %eax
|
||||
mov %ax, %ds
|
||||
|
||||
/* Turn on the PAE bit for when paging is enabled */
|
||||
/*
|
||||
* Turn on the PAE bit and optionally the LA57 bit for when paging
|
||||
* is later enabled.
|
||||
*/
|
||||
mov %cr4, %eax
|
||||
orl $CR4_PAE, %eax
|
||||
mov %eax, %cr4
|
||||
cmpb $0, mptramp_la57-mptramp_start(%ebx)
|
||||
je 1f
|
||||
orl $CR4_LA57, %eax
|
||||
1: mov %eax, %cr4
|
||||
|
||||
/*
|
||||
* Enable EFER.LME so that we get long mode when all the prereqs are
|
||||
@ -132,9 +138,9 @@ protmode:
|
||||
/*
|
||||
* At this point paging is enabled, and we are in "compatibility" mode.
|
||||
* We do another far jump to reload %cs with the 64 bit selector.
|
||||
* %cr3 points to a 4-level page table page.
|
||||
* %cr3 points to a 4- or 5-level page table.
|
||||
* We cannot yet jump all the way to the kernel because we can only
|
||||
* specify a 32 bit linear address. So, yet another trampoline.
|
||||
* specify a 32 bit linear address. So, we use yet another trampoline.
|
||||
*
|
||||
* The following instruction is:
|
||||
* ljmp $kernelcode-gdt, $tramp_64-mptramp_start
|
||||
@ -209,6 +215,11 @@ gdtend:
|
||||
mptramp_pagetables:
|
||||
.long 0
|
||||
|
||||
/* 5-level paging ? */
|
||||
.globl mptramp_la57
|
||||
mptramp_la57:
|
||||
.long 0
|
||||
|
||||
/*
|
||||
* The pseudo descriptor for lgdt to use.
|
||||
*/
|
||||
@ -251,8 +262,12 @@ entry_64:
|
||||
* Load a real %cr3 that has all the direct map stuff and switches
|
||||
* off the 1GB replicated mirror. Load a stack pointer and jump
|
||||
* into AP startup code in C.
|
||||
*/
|
||||
*/
|
||||
cmpl $0, la57
|
||||
jne 2f
|
||||
movq KPML4phys, %rax
|
||||
movq %rax, %cr3
|
||||
jmp 3f
|
||||
2: movq KPML5phys, %rax
|
||||
3: movq %rax, %cr3
|
||||
movq bootSTK, %rsp
|
||||
jmp init_secondary
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -46,6 +46,8 @@ extern int syscall_ret_l1d_flush_mode;
|
||||
extern vm_paddr_t intel_graphics_stolen_base;
|
||||
extern vm_paddr_t intel_graphics_stolen_size;
|
||||
|
||||
extern int la57;
|
||||
|
||||
/*
|
||||
* The file "conf/ldscript.amd64" defines the symbol "kernphys". Its
|
||||
* value is the physical address at which the kernel is loaded.
|
||||
|
@ -118,6 +118,12 @@
|
||||
#define PML4SHIFT 39 /* LOG2(NBPML4) */
|
||||
#define NBPML4 (1UL<<PML4SHIFT)/* bytes/page map lev4 table */
|
||||
#define PML4MASK (NBPML4-1)
|
||||
/* Size of the level 5 page-map level-5 table units */
|
||||
#define NPML5EPG (PAGE_SIZE/(sizeof (pml5_entry_t)))
|
||||
#define NPML5EPGSHIFT 9 /* LOG2(NPML5EPG) */
|
||||
#define PML5SHIFT 48 /* LOG2(NBPML5) */
|
||||
#define NBPML5 (1UL<<PML5SHIFT)/* bytes/page map lev5 table */
|
||||
#define PML5MASK (NBPML5-1)
|
||||
|
||||
#define MAXPAGESIZES 3 /* maximum number of supported page sizes */
|
||||
|
||||
|
@ -166,14 +166,22 @@
|
||||
* Pte related macros. This is complicated by having to deal with
|
||||
* the sign extension of the 48th bit.
|
||||
*/
|
||||
#define KVADDR(l4, l3, l2, l1) ( \
|
||||
#define KV4ADDR(l4, l3, l2, l1) ( \
|
||||
((unsigned long)-1 << 47) | \
|
||||
((unsigned long)(l4) << PML4SHIFT) | \
|
||||
((unsigned long)(l3) << PDPSHIFT) | \
|
||||
((unsigned long)(l2) << PDRSHIFT) | \
|
||||
((unsigned long)(l1) << PAGE_SHIFT))
|
||||
#define KV5ADDR(l5, l4, l3, l2, l1) ( \
|
||||
((unsigned long)-1 << 56) | \
|
||||
((unsigned long)(l5) << PML5SHIFT) | \
|
||||
((unsigned long)(l4) << PML4SHIFT) | \
|
||||
((unsigned long)(l3) << PDPSHIFT) | \
|
||||
((unsigned long)(l2) << PDRSHIFT) | \
|
||||
((unsigned long)(l1) << PAGE_SHIFT))
|
||||
|
||||
#define UVADDR(l4, l3, l2, l1) ( \
|
||||
#define UVADDR(l5, l4, l3, l2, l1) ( \
|
||||
((unsigned long)(l5) << PML5SHIFT) | \
|
||||
((unsigned long)(l4) << PML4SHIFT) | \
|
||||
((unsigned long)(l3) << PDPSHIFT) | \
|
||||
((unsigned long)(l2) << PDRSHIFT) | \
|
||||
@ -187,9 +195,19 @@
|
||||
*/
|
||||
#define NKPML4E 4
|
||||
|
||||
#define NUPML4E (NPML4EPG/2) /* number of userland PML4 pages */
|
||||
#define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */
|
||||
#define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */
|
||||
/*
|
||||
* We use the same numbering of the page table pages for 5-level and
|
||||
* 4-level paging structures.
|
||||
*/
|
||||
#define NUPML5E (NPML5EPG / 2) /* number of userland PML5
|
||||
pages */
|
||||
#define NUPML4E (NUPML5E * NPML4EPG) /* number of userland PML4
|
||||
pages */
|
||||
#define NUPDPE (NUPML4E * NPDPEPG) /* number of userland PDP
|
||||
pages */
|
||||
#define NUPDE (NUPDPE * NPDEPG) /* number of userland PD
|
||||
entries */
|
||||
#define NUP4ML4E (NPML4EPG / 2)
|
||||
|
||||
/*
|
||||
* NDMPML4E is the maximum number of PML4 entries that will be
|
||||
@ -216,7 +234,8 @@
|
||||
* Or, in other words, KPML4I provides bits 39..47 of KERNBASE,
|
||||
* and KPDPI provides bits 30..38.)
|
||||
*/
|
||||
#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
|
||||
#define PML4PML4I (NPML4EPG / 2) /* Index of recursive pml4 mapping */
|
||||
#define PML5PML5I (NPML5EPG / 2) /* Index of recursive pml5 mapping */
|
||||
|
||||
#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */
|
||||
#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
|
||||
@ -258,25 +277,34 @@ typedef u_int64_t pd_entry_t;
|
||||
typedef u_int64_t pt_entry_t;
|
||||
typedef u_int64_t pdp_entry_t;
|
||||
typedef u_int64_t pml4_entry_t;
|
||||
typedef u_int64_t pml5_entry_t;
|
||||
|
||||
/*
|
||||
* Address of current address space page table maps and directories.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
#define addr_PTmap (KVADDR(PML4PML4I, 0, 0, 0))
|
||||
#define addr_PDmap (KVADDR(PML4PML4I, PML4PML4I, 0, 0))
|
||||
#define addr_PDPmap (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
|
||||
#define addr_PML4map (KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
|
||||
#define addr_PML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
|
||||
#define PTmap ((pt_entry_t *)(addr_PTmap))
|
||||
#define PDmap ((pd_entry_t *)(addr_PDmap))
|
||||
#define PDPmap ((pd_entry_t *)(addr_PDPmap))
|
||||
#define PML4map ((pd_entry_t *)(addr_PML4map))
|
||||
#define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e))
|
||||
#define addr_P4Tmap (KV4ADDR(PML4PML4I, 0, 0, 0))
|
||||
#define addr_P4Dmap (KV4ADDR(PML4PML4I, PML4PML4I, 0, 0))
|
||||
#define addr_P4DPmap (KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
|
||||
#define addr_P4ML4map (KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
|
||||
#define addr_P4ML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
|
||||
#define P4Tmap ((pt_entry_t *)(addr_P4Tmap))
|
||||
#define P4Dmap ((pd_entry_t *)(addr_P4Dmap))
|
||||
|
||||
#define addr_P5Tmap (KV5ADDR(PML5PML5I, 0, 0, 0, 0))
|
||||
#define addr_P5Dmap (KV5ADDR(PML5PML5I, PML5PML5I, 0, 0, 0))
|
||||
#define addr_P5DPmap (KV5ADDR(PML5PML5I, PML5PML5I, PML5PML5I, 0, 0))
|
||||
#define addr_P5ML4map (KV5ADDR(PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I, 0))
|
||||
#define addr_P5ML5map \
|
||||
(KVADDR(PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I, PML5PML5I))
|
||||
#define addr_P5ML5pml5e (addr_P5ML5map + (PML5PML5I * sizeof(pml5_entry_t)))
|
||||
#define P5Tmap ((pt_entry_t *)(addr_P5Tmap))
|
||||
#define P5Dmap ((pd_entry_t *)(addr_P5Dmap))
|
||||
|
||||
extern int nkpt; /* Initial number of kernel page tables */
|
||||
extern u_int64_t KPDPphys; /* physical address of kernel level 3 */
|
||||
extern u_int64_t KPML4phys; /* physical address of kernel level 4 */
|
||||
extern u_int64_t KPML5phys; /* physical address of kernel level 5 */
|
||||
|
||||
/*
|
||||
* virtual address to page table entry and
|
||||
@ -333,8 +361,8 @@ struct pmap_pcids {
|
||||
*/
|
||||
struct pmap {
|
||||
struct mtx pm_mtx;
|
||||
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
|
||||
pml4_entry_t *pm_pml4u; /* KVA of user l4 page table */
|
||||
pml4_entry_t *pm_pmltop; /* KVA of top level page table */
|
||||
pml4_entry_t *pm_pmltopu; /* KVA of user top page table */
|
||||
uint64_t pm_cr3;
|
||||
uint64_t pm_ucr3;
|
||||
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
|
||||
@ -447,6 +475,7 @@ bool pmap_not_in_di(void);
|
||||
boolean_t pmap_page_is_mapped(vm_page_t m);
|
||||
void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
|
||||
void pmap_pinit_pml4(vm_page_t);
|
||||
void pmap_pinit_pml5(vm_page_t);
|
||||
bool pmap_ps_enabled(pmap_t pmap);
|
||||
void pmap_unmapdev(vm_offset_t, vm_size_t);
|
||||
void pmap_invalidate_page(pmap_t, vm_offset_t);
|
||||
@ -502,6 +531,13 @@ pmap_pml4e_index(vm_offset_t va)
|
||||
return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
|
||||
}
|
||||
|
||||
static __inline vm_pindex_t
|
||||
pmap_pml5e_index(vm_offset_t va)
|
||||
{
|
||||
|
||||
return ((va >> PML5SHIFT) & ((1ul << NPML5EPGSHIFT) - 1));
|
||||
}
|
||||
|
||||
#endif /* !LOCORE */
|
||||
|
||||
#endif /* !_MACHINE_PMAP_H_ */
|
||||
|
@ -84,6 +84,8 @@ struct mdproc {
|
||||
};
|
||||
|
||||
#define P_MD_KPTI 0x00000001 /* Enable KPTI on exec */
|
||||
#define P_MD_LA48 0x00000002 /* Request LA48 after exec */
|
||||
#define P_MD_LA57 0x00000004 /* Request LA57 after exec */
|
||||
|
||||
#define KINFO_PROC_SIZE 1088
|
||||
#define KINFO_PROC32_SIZE 768
|
||||
|
@ -169,25 +169,32 @@
|
||||
* 0xffffffff80000000 KERNBASE
|
||||
*/
|
||||
|
||||
#define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4BASE, 0, 0, 0)
|
||||
#define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4BASE + NKPML4E - 1, \
|
||||
#define VM_MIN_KERNEL_ADDRESS KV4ADDR(KPML4BASE, 0, 0, 0)
|
||||
#define VM_MAX_KERNEL_ADDRESS KV4ADDR(KPML4BASE + NKPML4E - 1, \
|
||||
NPDPEPG-1, NPDEPG-1, NPTEPG-1)
|
||||
|
||||
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
|
||||
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
|
||||
#define DMAP_MIN_ADDRESS KV4ADDR(DMPML4I, 0, 0, 0)
|
||||
#define DMAP_MAX_ADDRESS KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0)
|
||||
|
||||
#define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0)
|
||||
#define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0)
|
||||
#define LARGEMAP_MIN_ADDRESS KV4ADDR(LMSPML4I, 0, 0, 0)
|
||||
#define LARGEMAP_MAX_ADDRESS KV4ADDR(LMEPML4I + 1, 0, 0, 0)
|
||||
|
||||
#define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0)
|
||||
#define KERNBASE KV4ADDR(KPML4I, KPDPI, 0, 0)
|
||||
|
||||
#define UPT_MAX_ADDRESS KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
|
||||
#define UPT_MIN_ADDRESS KVADDR(PML4PML4I, 0, 0, 0)
|
||||
#define UPT_MAX_ADDRESS KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
|
||||
#define UPT_MIN_ADDRESS KV4ADDR(PML4PML4I, 0, 0, 0)
|
||||
|
||||
#define VM_MAXUSER_ADDRESS UVADDR(NUPML4E, 0, 0, 0)
|
||||
#define VM_MAXUSER_ADDRESS_LA57 UVADDR(NUPML5E, 0, 0, 0, 0)
|
||||
#define VM_MAXUSER_ADDRESS_LA48 UVADDR(0, NUP4ML4E, 0, 0, 0)
|
||||
#define VM_MAXUSER_ADDRESS VM_MAXUSER_ADDRESS_LA57
|
||||
|
||||
#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
|
||||
#define USRSTACK SHAREDPAGE
|
||||
#define SHAREDPAGE_LA57 (VM_MAXUSER_ADDRESS_LA57 - PAGE_SIZE)
|
||||
#define SHAREDPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - PAGE_SIZE)
|
||||
#define USRSTACK_LA57 SHAREDPAGE_LA57
|
||||
#define USRSTACK_LA48 SHAREDPAGE_LA48
|
||||
#define USRSTACK USRSTACK_LA48
|
||||
#define PS_STRINGS_LA57 (USRSTACK_LA57 - sizeof(struct ps_strings))
|
||||
#define PS_STRINGS_LA48 (USRSTACK_LA48 - sizeof(struct ps_strings))
|
||||
|
||||
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
|
||||
#define VM_MIN_ADDRESS (0)
|
||||
|
@ -739,9 +739,9 @@ struct sysentvec elf_linux_sysvec = {
|
||||
.sv_imgact_try = linux_exec_imgact_try,
|
||||
.sv_minsigstksz = LINUX_MINSIGSTKSZ,
|
||||
.sv_minuser = VM_MIN_ADDRESS,
|
||||
.sv_maxuser = VM_MAXUSER_ADDRESS,
|
||||
.sv_usrstack = USRSTACK,
|
||||
.sv_psstrings = PS_STRINGS,
|
||||
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48,
|
||||
.sv_usrstack = USRSTACK_LA48,
|
||||
.sv_psstrings = PS_STRINGS_LA48,
|
||||
.sv_stackprot = VM_PROT_ALL,
|
||||
.sv_copyout_auxargs = linux_copyout_auxargs,
|
||||
.sv_copyout_strings = linux_copyout_strings,
|
||||
@ -752,7 +752,7 @@ struct sysentvec elf_linux_sysvec = {
|
||||
.sv_set_syscall_retval = linux_set_syscall_retval,
|
||||
.sv_fetch_syscall_args = linux_fetch_syscall_args,
|
||||
.sv_syscallnames = NULL,
|
||||
.sv_shared_page_base = SHAREDPAGE,
|
||||
.sv_shared_page_base = SHAREDPAGE_LA48,
|
||||
.sv_shared_page_len = PAGE_SIZE,
|
||||
.sv_schedtail = linux_schedtail,
|
||||
.sv_thread_detach = linux_thread_detach,
|
||||
|
@ -560,7 +560,7 @@ svm_vminit(struct vm *vm, pmap_t pmap)
|
||||
panic("contigmalloc of SVM IO bitmap failed");
|
||||
|
||||
svm_sc->vm = vm;
|
||||
svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
|
||||
svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pmltop);
|
||||
|
||||
/*
|
||||
* Intercept read and write accesses to all MSRs.
|
||||
|
@ -1030,7 +1030,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
|
||||
}
|
||||
vmx->vm = vm;
|
||||
|
||||
vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pml4));
|
||||
vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pmltop));
|
||||
|
||||
/*
|
||||
* Clean up EPTP-tagged guest physical and combined mappings
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include <machine/clock.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/frame.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/psl.h>
|
||||
#include <machine/trap.h>
|
||||
#include <vm/pmap.h>
|
||||
@ -131,7 +132,7 @@ dtrace_invop_uninit(void)
|
||||
void
|
||||
dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
|
||||
{
|
||||
(*func)(0, (uintptr_t) addr_PTmap);
|
||||
(*func)(0, la57 ? (uintptr_t)addr_P5Tmap : (uintptr_t)addr_P4Tmap);
|
||||
}
|
||||
|
||||
void
|
||||
|
Loading…
x
Reference in New Issue
Block a user