Major pmap rework to take advantage of the larger address space on amd64

systems.  Of note:
- Implement a direct mapped region using 2MB pages.  This eliminates the
  need for temporary mappings when getting ptes.  This supports up to
  512GB of physical memory for now.  This should be enough for a while.
- Implement a 4-tier page table system.  Most of the infrastructure is
  there for 128TB of userland virtual address space, but only 512GB is
  presently enabled due to a mystery bug somewhere.  The design of this
  was heavily inspired by the alpha pmap.c.
- The kernel is moved into the negative address space(!).
- The kernel has 2GB of KVM available.
- Provide a uma memory allocator to use the direct map region to take
  advantage of the 2MB TLBs.
- Fixed some assumptions in the bus_space macros about the ability
  to fit virtual addresses in an 'int'.

Notable missing things:
- pmap_growkernel() should be able to grow to 512GB of KVM by expanding
  downwards below kernbase.  The kernel must be at the top 2GB of the
  negative address space because of gcc code generation strategies.
- need to fix the >512GB user vm code.

Approved by:	re (blanket)
This commit is contained in:
Peter Wemm 2003-05-23 05:04:54 +00:00
parent 74f2cc2c9c
commit 3c9a3c9ca3
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=115251
13 changed files with 653 additions and 603 deletions

View File

@ -99,8 +99,11 @@ ASSYM(KSTACK_PAGES, KSTACK_PAGES);
ASSYM(PAGE_SIZE, PAGE_SIZE);
ASSYM(NPTEPG, NPTEPG);
ASSYM(NPDEPG, NPDEPG);
ASSYM(NPDEPTD, NPDEPTD);
ASSYM(NPGPTD, NPGPTD);
ASSYM(addr_PTmap, addr_PTmap);
ASSYM(addr_PDmap, addr_PDmap);
ASSYM(addr_PDPmap, addr_PDPmap);
ASSYM(addr_PML4map, addr_PML4map);
ASSYM(addr_PML4pml4e, addr_PML4pml4e);
ASSYM(PDESIZE, sizeof(pd_entry_t));
ASSYM(PTESIZE, sizeof(pt_entry_t));
ASSYM(PTESHIFT, PTESHIFT);
@ -109,9 +112,14 @@ ASSYM(PAGE_MASK, PAGE_MASK);
ASSYM(PDRSHIFT, PDRSHIFT);
ASSYM(PDPSHIFT, PDPSHIFT);
ASSYM(PML4SHIFT, PML4SHIFT);
ASSYM(val_KPDPI, KPDPI);
ASSYM(val_KPML4I, KPML4I);
ASSYM(val_PML4PML4I, PML4PML4I);
ASSYM(USRSTACK, USRSTACK);
ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
ASSYM(KERNBASE, KERNBASE);
ASSYM(DMAP_MIN_ADDRESS, DMAP_MIN_ADDRESS);
ASSYM(DMAP_MAX_ADDRESS, DMAP_MAX_ADDRESS);
ASSYM(MCLBYTES, MCLBYTES);
ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15));

View File

@ -36,8 +36,15 @@
/*
* Compiled KERNBASE location
*/
.globl kernbase
.globl kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend
.set kernbase,KERNBASE
.set loc_PTmap,addr_PTmap
.set loc_PDmap,addr_PDmap
.set loc_PDPmap,addr_PDPmap
.set loc_PML4map,addr_PML4map
.set loc_PML4pml4e,addr_PML4pml4e
.set dmapbase,DMAP_MIN_ADDRESS
.set dmapend,DMAP_MAX_ADDRESS
.text
/**********************************************************************

View File

@ -133,11 +133,6 @@ u_long atdevbase;
u_int64_t modulep; /* phys addr of metadata table */
u_int64_t physfree; /* first free page after kernel */
u_int64_t IdlePTD; /* phys addr of kernel PTD */
u_int64_t IdlePDP; /* phys addr of kernel level 3 */
u_int64_t IdlePML4; /* phys addr of kernel level 4 */
struct user *proc0uarea; /* address of proc 0 uarea space */
vm_offset_t proc0kstack; /* address of proc 0 kstack space */
int cold = 1;
@ -945,7 +940,7 @@ getmemsize(caddr_t kmdp, u_int64_t first)
physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
/* call pmap initialization to make new kernel address space */
pmap_bootstrap(first, 0);
pmap_bootstrap(&first);
/*
* Size up each available chunk of physical memory.
@ -1086,69 +1081,6 @@ allocpages(int n)
return (ret);
}
static void
create_pagetables(void)
{
u_int64_t p0kpa;
u_int64_t p0upa;
u_int64_t KPTphys;
int i;
/* Allocate pages */
KPTphys = allocpages(NKPT);
IdlePML4 = allocpages(NKPML4E);
IdlePDP = allocpages(NKPDPE);
IdlePTD = allocpages(NPGPTD);
p0upa = allocpages(UAREA_PAGES);
p0kpa = allocpages(KSTACK_PAGES);
proc0uarea = (struct user *)(p0upa + KERNBASE);
proc0kstack = p0kpa + KERNBASE;
/* Fill in the underlying page table pages */
/* Read-only from zero to physfree */
/* XXX not fully used, underneath 2M pages */
for (i = 0; (i << PAGE_SHIFT) < physfree; i++) {
((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V;
}
/* Now map the page tables at their location within PTmap */
for (i = 0; i < NKPT; i++) {
((pd_entry_t *)IdlePTD)[i + KPTDI] = KPTphys + (i << PAGE_SHIFT);
((pd_entry_t *)IdlePTD)[i + KPTDI] |= PG_RW | PG_V;
}
/* Map from zero to end of allocations under 2M pages */
/* This replaces some of the PTD entries above */
for (i = 0; (i << PDRSHIFT) < physfree; i++) {
((pd_entry_t *)IdlePTD)[i] = i << PDRSHIFT;
((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V | PG_PS;
}
/* Now map the page tables at their location within PTmap */
for (i = 0; i < NKPT; i++) {
((pd_entry_t *)IdlePTD)[i] = KPTphys + (i << PAGE_SHIFT);
((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V;
}
/* Now map the PTD at the top of the PTmap (ie: PTD[]) */
for (i = 0; i < NPGPTD; i++) {
((pd_entry_t *)IdlePTD)[i + PTDPTDI] = IdlePTD + (i << PAGE_SHIFT);
((pd_entry_t *)IdlePTD)[i + PTDPTDI] |= PG_RW | PG_V;
}
/* And connect up the PTD to the PDP */
for (i = 0; i < NPGPTD; i++) {
((pdp_entry_t *)IdlePDP)[i] = IdlePTD + (i << PAGE_SHIFT);
((pdp_entry_t *)IdlePDP)[i] |= PG_RW | PG_V | PG_U;
}
/* And connect up the PDP to the PML4 */
((pdp_entry_t *)IdlePML4)[0] = IdlePDP;
((pdp_entry_t *)IdlePML4)[0] |= PG_RW | PG_V | PG_U;
}
void
hammer_time(void)
{
@ -1157,18 +1089,14 @@ hammer_time(void)
struct region_descriptor r_gdt, r_idt;
struct pcpu *pc;
u_int64_t msr;
char *env;
/* Turn on PTE NX (no execute) bit */
msr = rdmsr(MSR_EFER) | EFER_NXE;
wrmsr(MSR_EFER, msr);
create_pagetables();
/* XXX do %cr0 as well */
load_cr4(rcr4() | CR4_PGE | CR4_PSE);
load_cr3(IdlePML4);
proc0.p_uarea = proc0uarea;
thread0.td_kstack = proc0kstack;
proc0.p_uarea = (struct user *)(allocpages(UAREA_PAGES) + KERNBASE);
thread0.td_kstack = allocpages(KSTACK_PAGES) + KERNBASE;
thread0.td_pcb = (struct pcb *)
(thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
atdevbase = ISA_HOLE_START + KERNBASE;
@ -1310,8 +1238,12 @@ hammer_time(void)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
thread0.td_pcb->pcb_cr3 = IdlePML4;
thread0.td_pcb->pcb_cr3 = KPML4phys;
thread0.td_frame = &proc0_tf;
env = getenv("kernelname");
if (env != NULL)
strlcpy(kernelname, env, sizeof(kernelname));
}
void

View File

@ -63,6 +63,7 @@
#include <machine/frame.h>
#include <machine/psl.h>
#include <machine/specialreg.h>
#include <machine/vmparam.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@ -177,7 +178,7 @@ mmrw(dev_t dev, struct uio *uio, int flags)
addr = trunc_page(uio->uio_offset);
eaddr = round_page(uio->uio_offset + c);
if (addr < (vm_offset_t)VADDR(0, 0, PTDPTDI, 0))
if (addr < (vm_offset_t)KERNBASE)
return (EFAULT);
for (; addr < eaddr; addr += PAGE_SIZE)
if (pmap_extract(kernel_pmap, addr) == 0)

View File

@ -355,8 +355,8 @@ nexus_activate_resource(device_t bus, device_t child, int type, int rid,
*/
vaddr = (caddr_t)(uintptr_t)(KERNBASE + rman_get_start(r));
} else {
u_int32_t paddr;
u_int32_t psize;
u_int64_t paddr;
u_int64_t psize;
u_int32_t poffs;
paddr = rman_get_start(r);

File diff suppressed because it is too large Load Diff

View File

@ -212,7 +212,7 @@ trap(frame)
* kernel can print out a useful trap message and even get
* to the debugger.
*/
if (td->td_critnest == 0)
if (td->td_critnest != 0)
trap_fatal(&frame, frame.tf_addr);
}

View File

@ -264,7 +264,7 @@ cpu_reset_real()
printf("Keyboard reset did not work, attempting CPU shutdown\n");
DELAY(1000000); /* wait 1 sec for printf to complete */
/* force a shutdown by unmapping entire address space ! */
bzero((caddr_t)PTD, NBPTD);
bzero((caddr_t)PML4map, PAGE_SIZE);
/* "good night, sweet prince .... <THUNK!>" */
invltlb();

View File

@ -401,7 +401,7 @@ bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
int _port_ = bsh + offset;
bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@ -443,7 +443,7 @@ bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
int _port_ = bsh + offset;
bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@ -485,7 +485,7 @@ bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
int _port_ = bsh + offset;
bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@ -741,7 +741,7 @@ bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
int _port_ = bsh + offset;
bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@ -783,7 +783,7 @@ bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
int _port_ = bsh + offset;
bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@ -825,7 +825,7 @@ bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
int _port_ = bsh + offset;
bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\

View File

@ -89,34 +89,32 @@
#define ALIGN(p) _ALIGN(p)
#define ALIGNED_POINTER(p,t) _ALIGNED_POINTER((p),(t))
/* Size of the level 1 page table units */
#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
#define NPTEPGSHIFT 9 /* LOG2(NPTEPG) */
#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */
#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */
#define PAGE_MASK (PAGE_SIZE-1)
/* Size of the level 2 page directory units */
#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NPDEPGSHIFT 9 /* LOG2(NPDEPG) */
#define PDRSHIFT 21 /* LOG2(NBPDR) */
#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */
#define PDRMASK (NBPDR-1)
/* Size of the level 3 page directory pointer table units */
#define NPDPEPG (PAGE_SIZE/(sizeof (pdp_entry_t)))
#define NPDPEPGSHIFT 9 /* LOG2(NPDPEPG) */
#define PDPSHIFT 30 /* LOG2(NBPDP) */
#define NBPDP (1<<PDPSHIFT) /* bytes/page dir ptr table */
#define PDPMASK (NBPDP-1)
/* Size of the level 4 page-map level-4 table units */
#define NPML4EPG (PAGE_SIZE/(sizeof (pml4_entry_t)))
#define NPML4EPGSHIFT 9 /* LOG2(NPML4EPG) */
#define PML4SHIFT 39 /* LOG2(NBPML4T) */
#define NBPML4T (1ul<<PML4SHIFT)/* bytes/page map lev4 table */
#define PML4MASK (NBPML4T-1)
#define NKPML4E 1 /* addressable number of page tables/pde's */
#define NKPDPE 1 /* addressable number of page tables/pde's */
#define NPGPTD 4
#define NBPTD (NPGPTD<<PAGE_SHIFT)
#define NPDEPTD (NBPTD/(sizeof (pd_entry_t)))
#define IOPAGES 2 /* pages of i/o permission bitmap */
#define KSTACK_PAGES 4 /* pages of kstack (with pcb) */

View File

@ -82,13 +82,6 @@
#define PGEX_W 0x02 /* during a Write cycle */
#define PGEX_U 0x04 /* access from User mode (UPL) */
/*
* Size of Kernel address space. This is the number of level 4 (top)
* entries. We use half of them for the kernel due to the 48 bit
* virtual address sign extension.
*/
#define KVA_PAGES 1536
/*
* Pte related macros. This is complicated by having to deal with
* the sign extension of the 48th bit.
@ -105,15 +98,26 @@
#ifndef NKPT
#define NKPT 120 /* initial number of kernel page tables */
#endif
#ifndef NKPDE
#define NKPDE (KVA_PAGES) /* number of page tables/pde's */
#endif
#define NKPML4E 1 /* number of kernel PML4 slots */
#define NKPDPE 1 /* number of kernel PDP slots */
#define NKPDE (NKPDPE*NPDEPG) /* number of kernel PD slots */
#define NUPML4E 1 /* number of userland PML4 pages */
#define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */
#define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */
#define NDMPML4E 1 /* number of dmap PML4 slots */
/*
* The *PTDI values control the layout of virtual memory
* The *PDI values control the layout of virtual memory
*/
#define KPTDI (NPDEPTD-NKPDE) /* start of kernel virtual pde's */
#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */
#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
#define KPML4I (NPML4EPG-1)
#define DMPML4I (KPML4I-1)
#define KPDPI (NPDPEPG-1)
/*
* XXX doesn't really belong here I guess...
@ -145,13 +149,18 @@ typedef u_int64_t pml4_entry_t;
* in the page tables and the evil overlapping.
*/
#ifdef _KERNEL
#define PTmap ((pt_entry_t *)(VADDR(0, 0, PTDPTDI, 0)))
#define PTD ((pd_entry_t *)(VADDR(0, 0, PTDPTDI, PTDPTDI)))
#define PTDpde ((pd_entry_t *)(VADDR(0, 0, PTDPTDI, PTDPTDI) + (PTDPTDI * sizeof(pd_entry_t))))
#define addr_PTmap (VADDR(PML4PML4I, 0, 0, 0))
#define addr_PDmap (VADDR(PML4PML4I, PML4PML4I, 0, 0))
#define addr_PDPmap (VADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
#define addr_PML4map (VADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
#define addr_PML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
#define PTmap ((pt_entry_t *)(addr_PTmap))
#define PDmap ((pd_entry_t *)(addr_PDmap))
#define PDPmap ((pd_entry_t *)(addr_PDPmap))
#define PML4map ((pd_entry_t *)(addr_PML4map))
#define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e))
extern u_int64_t IdlePML4; /* physical address of "Idle" state directory */
extern u_int64_t IdlePDP; /* physical address of "Idle" state directory */
extern u_int64_t IdlePTD; /* physical address of "Idle" state directory */
extern u_int64_t KPML4phys; /* physical address of kernel level 4 */
#endif
#ifdef _KERNEL
@ -161,28 +170,8 @@ extern u_int64_t IdlePTD; /* physical address of "Idle" state directory */
* Note: these work recursively, thus vtopte of a pte will give
* the corresponding pde that in turn maps it.
*/
#define vtopte(va) (PTmap + amd64_btop(va))
/*
* Routine: pmap_kextract
* Function:
* Extract the physical page address associated
* kernel virtual address.
*/
static __inline vm_paddr_t
pmap_kextract(vm_offset_t va)
{
vm_paddr_t pa;
pa = PTD[va >> PDRSHIFT];
if (pa & PG_PS) {
pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1));
} else {
pa = *vtopte(va);
pa = (pa & PG_FRAME) | (va & PAGE_MASK);
}
return pa;
}
pt_entry_t *vtopte(vm_offset_t);
vm_paddr_t pmap_kextract(vm_offset_t);
#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
@ -225,14 +214,12 @@ struct md_page {
};
struct pmap {
pd_entry_t *pm_pdir; /* KVA of page directory */
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
vm_object_t pm_pteobj; /* Container for pte's */
TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
u_long pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
pdp_entry_t *pm_pdp; /* KVA of level 3 page table */
pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
};
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
@ -278,7 +265,7 @@ extern char *ptvmmap; /* poor name! */
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
void pmap_bootstrap(vm_paddr_t, vm_paddr_t);
void pmap_bootstrap(vm_paddr_t *);
void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
void pmap_kremove(vm_offset_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);

View File

@ -41,7 +41,7 @@
#ifndef _MACHINE_VMPARAM_H_
#define _MACHINE_VMPARAM_H_ 1
#define _MACHINE_VMPARAM_H_ 1
/*
* Machine dependent constants for AMD64.
@ -64,7 +64,7 @@
#define MAXSSIZ (64UL*1024*1024) /* max stack size */
#endif
#ifndef SGROWSIZ
#define SGROWSIZ (128UL*1024) /* amount to grow stack */
#define SGROWSIZ (128UL*1024) /* amount to grow stack */
#endif
/*
@ -78,6 +78,12 @@
*/
#define MAXSLP 20
/*
* We provide a machine specific single page allocator through the tuse
* of the direct mapped segment. This uses 2MB pages for reduced
* TLB pressure.
*/
#define UMA_MD_SMALL_ALLOC
/*
* Virtual addresses of things. Derived from the page directory and
@ -86,24 +92,30 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
#define VM_MAX_KERNEL_ADDRESS VADDR(0, 0, KPTDI+NKPDE-1, NPTEPG-1)
#define VM_MIN_KERNEL_ADDRESS VADDR(0, 0, PTDPTDI, PTDPTDI)
#define VM_MAX_KERNEL_ADDRESS VADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1)
#define VM_MIN_KERNEL_ADDRESS VADDR(KPML4I, KPDPI, 0, 0)
#define KERNBASE VADDR(0, 0, KPTDI, 0)
#define DMAP_MIN_ADDRESS VADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS VADDR(DMPML4I+1, 0, 0, 0)
#define UPT_MAX_ADDRESS VADDR(0, 0, PTDPTDI, PTDPTDI)
#define UPT_MIN_ADDRESS VADDR(0, 0, PTDPTDI, 0)
#define KERNBASE VADDR(KPML4I, KPDPI, 0, 0)
#define VM_MAXUSER_ADDRESS UPT_MIN_ADDRESS
#define UPT_MAX_ADDRESS VADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
#define UPT_MIN_ADDRESS VADDR(PML4PML4I, 0, 0, 0)
#define USRSTACK VM_MAXUSER_ADDRESS
#define VM_MAXUSER_ADDRESS VADDR(NUPML4E, 0, 0, 0)
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
#define VM_MIN_ADDRESS (0)
#define USRSTACK VM_MAXUSER_ADDRESS
#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
#define VM_MIN_ADDRESS (0)
#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS)
#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS)
/* virtual sizes (bytes) for various kernel submaps */
#ifndef VM_KMEM_SIZE
#define VM_KMEM_SIZE (12 * 1024 * 1024)
#define VM_KMEM_SIZE (12 * 1024 * 1024)
#endif
/*

View File

@ -57,7 +57,7 @@ CFLAGS+= -mcmodel=medlow -msoft-float
# once pmap is ready. Be excessively careful to not generate FPU code.
#
.if ${MACHINE_ARCH} == "amd64"
CFLAGS+= -mcmodel=medium -mno-red-zone \
CFLAGS+= -mcmodel=kernel -mno-red-zone \
-mfpmath=387 -mno-sse -mno-sse2 -mno-mmx -mno-3dnow -msoft-float
.endif