freebsd-dev/sys/amd64/include/proc.h
Konstantin Belousov 9ce875d9b5 amd64 pmap: LA57 AKA 5-level paging
Since LA57 was moved to the main SDM document with revision 072, it
seems that we should have a support for it, and silicons are coming.

This patch makes pmap support both LA48 and LA57 hardware.  The
selection of page table level is done at startup, kernel always
receives control from loader with 4-level paging.  It is not clear how
UEFI spec would adapt LA57, for instance it could hand out control in
LA57 mode sometimes.

To switch from LA48 to LA57 requires turning off long mode, requesting
LA57 in CR4, then re-entering long mode.  This is somewhat delicate
and done in pmap_bootstrap_la57().  AP startup in LA57 mode is much
easier, we only need to toggle a bit in CR4 and load right value in CR3.

I decided to not change kernel map for now.  Single PML5 entry is
created that points to the existing kernel_pml4 (KML4Phys) page, and a
pml5 entry to create our recursive mapping for vtopte()/vtopde().
This decision is motivated by the fact that we cannot overcommit for
KVA, so large space there is unusable until machines start providing
wider physical memory addressing.  Another reason is that I do not
want to break our fragile autotuning, so the KVA expansion is not
included into this first step.  Nice side effect is that minidumps are
compatible.

On the other hand, (very) large address space is definitely
immediately useful for some userspace applications.

For userspace, numbering of pte entries (or page table pages) is
always done for 5-level structures even if we operate in 4-level mode.
The pmap_is_la57() function is added to report the mode of the
specified pmap, this is done not to allow simultaneous 4-/5-levels
(which is not allowed by hw), but to accomodate for EPT which has
separate level control and in principle might not allow 5-leve EPT
despite x86 paging supports it. Anyway, it does not seems critical to
have 5-level EPT support now.

Tested by:	pho (LA48 hardware)
Reviewed by:	alc
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D25273
2020-08-23 20:19:04 +00:00

126 lines
3.8 KiB
C

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)proc.h 7.1 (Berkeley) 5/15/91
* $FreeBSD$
*/
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
#include <sys/queue.h>
#include <machine/pcb.h>
#include <machine/segments.h>
/*
* List of locks
* c - proc lock
* k - only accessed by curthread
* pp - pmap.c:invl_gen_mtx
*/
struct proc_ldt {
caddr_t ldt_base;
int ldt_refcnt;
};
#define PMAP_INVL_GEN_NEXT_INVALID 0x1ULL
struct pmap_invl_gen {
u_long gen; /* (k) */
union {
LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
struct {
struct pmap_invl_gen *next;
u_char saved_pri;
};
};
} __aligned(16);
/*
* Machine-dependent part of the proc structure for AMD64.
*/
struct mdthread {
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
register_t md_spurflt_addr; /* (k) Spurious page fault address. */
struct pmap_invl_gen md_invl_gen;
register_t md_efirt_tmp; /* (k) */
int md_efirt_dis_pf; /* (k) */
struct pcb md_pcb;
vm_offset_t md_stack_base;
};
struct mdproc {
struct proc_ldt *md_ldt; /* (t) per-process ldt */
struct system_segment_descriptor md_ldt_sd;
u_int md_flags; /* (c) md process flags P_MD */
};
#define P_MD_KPTI 0x00000001 /* Enable KPTI on exec */
#define P_MD_LA48 0x00000002 /* Request LA48 after exec */
#define P_MD_LA57 0x00000004 /* Request LA57 after exec */
#define KINFO_PROC_SIZE 1088
#define KINFO_PROC32_SIZE 768
struct syscall_args {
u_int code;
struct sysent *callp;
register_t args[8];
int narg;
};
#ifdef _KERNEL
/* Get the current kernel thread stack usage. */
#define GET_STACK_USAGE(total, used) do { \
struct thread *td = curthread; \
(total) = td->td_kstack_pages * PAGE_SIZE; \
(used) = (char *)td->td_kstack + \
td->td_kstack_pages * PAGE_SIZE - \
(char *)&td; \
} while (0)
struct proc_ldt *user_ldt_alloc(struct proc *, int);
void user_ldt_free(struct thread *);
struct sysarch_args;
int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space);
int amd64_set_ldt_data(struct thread *td, int start, int num,
struct user_segment_descriptor *descs);
extern struct mtx dt_lock;
extern int max_ldt_segment;
#define NARGREGS 6
#endif /* _KERNEL */
#endif /* !_MACHINE_PROC_H_ */