MFC r281495:

Add config option PAE_TABLES for the i386 kernel.  It switches pmap to
use PAE format for the page tables, but does not incur other
consequences of the full PAE config.  In particular, vm_paddr_t and
bus_addr_t are left 32bit, and max supported memory is still limited
by 4GB.

The option allows to have nx permissions for memory mappings on i386
kernel, while keeping the usual i386 KBI and avoiding the kernel data
sizing problems typical for the PAE config.
This commit is contained in:
kib 2015-04-27 08:02:12 +00:00
parent bc0b39657e
commit facaa68fb9
16 changed files with 60 additions and 52 deletions

View File

@ -37,6 +37,11 @@ KVA_PAGES opt_global.h
# Physical address extensions and support for >4G ram. As above.
PAE opt_global.h
# Use PAE page tables, but limit memory support to 4GB.
# This keeps the i386 non-PAE KBI, in particular, drivers see
# 32bit vm_paddr_t.
PAE_TABLES opt_global.h
TIMER_FREQ opt_clock.h
CPU_ATHLON_SSE_HACK opt_cpu.h

View File

@ -389,7 +389,7 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.code32.limit = 0xffff;
ptd = (pd_entry_t *)rcr3();
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if (ptd == IdlePDPT)
#else
if (ptd == IdlePTD)

View File

@ -784,7 +784,7 @@ initializecpu(void)
init_transmeta();
break;
}
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((amd_feature & AMDID_NX) != 0) {
uint64_t msr;

View File

@ -99,7 +99,7 @@ physfree: .long 0 /* phys addr of next free page */
.globl IdlePTD
IdlePTD: .long 0 /* phys addr of kernel PTD */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
.globl IdlePDPT
IdlePDPT: .long 0 /* phys addr of kernel PDPT */
#endif
@ -281,7 +281,7 @@ NON_GPROF_ENTRY(btext)
1:
/* Now enable paging */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl %eax, %cr3
movl %cr4, %eax
@ -722,7 +722,7 @@ no_kernend:
movl %esi,R(KPTmap)
/* Allocate Page Table Directory */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
/* XXX only need 32 bytes (easier for now) */
ALLOCPAGES(1)
movl %esi,R(IdlePDPT)
@ -788,7 +788,7 @@ no_kernend:
fillkptphys($PG_RW)
/* Map page directory. */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl $1, %ecx
fillkptphys($PG_RW)
@ -890,7 +890,7 @@ done_pde:
movl $NPGPTD,%ecx
fillkpt(R(IdlePTD), $PG_RW)
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePTD), %eax
xorl %ebx, %ebx
movl $NPGPTD, %ecx

View File

@ -3080,7 +3080,7 @@ init386(first)
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
dblfault_tss.tss_cr3 = (int)IdlePDPT;
#else
dblfault_tss.tss_cr3 = (int)IdlePTD;
@ -3133,7 +3133,7 @@ init386(first)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
@ -3385,7 +3385,7 @@ init386(first)
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
dblfault_tss.tss_cr3 = (int)IdlePDPT;
#else
dblfault_tss.tss_cr3 = (int)IdlePTD;
@ -3457,7 +3457,7 @@ init386(first)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;

View File

@ -265,7 +265,7 @@ minidumpsys(struct dumperinfo *di)
mdhdr.bitmapsize = vm_page_dump_size;
mdhdr.ptesize = ptesize;
mdhdr.kernbase = KERNBASE;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
mdhdr.paemode = 1;
#endif

View File

@ -99,7 +99,7 @@ NON_GPROF_ENTRY(MPentry)
movl %eax,%cr4
/* Now enable paging mode */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl %eax, %cr3
movl %cr4, %eax

View File

@ -214,7 +214,7 @@ vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR;
extern u_int32_t KERNend;
extern u_int32_t KPTphys;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
pt_entry_t pg_nx;
static uma_zone_t pdptzone;
#endif
@ -339,7 +339,7 @@ static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free);
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
static void pmap_pte_release(pt_entry_t *pte);
static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
#endif
static void pmap_set_pg(void);
@ -399,7 +399,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
*/
PMAP_LOCK_INIT(kernel_pmap);
kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
#endif
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
@ -656,7 +656,7 @@ pmap_page_init(vm_page_t m)
m->md.pat_mode = PAT_WRITE_BACK;
}
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
static void *
pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
@ -669,7 +669,7 @@ pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
#endif
/*
* ABuse the pte nodes for unmapped kva to thread a kva freelist through.
* Abuse the pte nodes for unmapped kva to thread a kva freelist through.
* Requirements:
* - Must deal with pages in order to ensure that none of the PG_* bits
* are ever set, PG_V in particular.
@ -808,7 +808,7 @@ pmap_init(void)
if (pv_chunkbase == NULL)
panic("pmap_init: not enough kvm for pv chunks");
pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
UMA_ZONE_VM | UMA_ZONE_NOFREE);
@ -1743,7 +1743,7 @@ pmap_pinit0(pmap_t pmap)
* not need to be inserted into that list.
*/
pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
pmap->pm_root.rt_root = 0;
@ -1772,7 +1772,7 @@ pmap_pinit(pmap_t pmap)
pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
if (pmap->pm_pdir == NULL)
return (0);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
KASSERT(((vm_offset_t)pmap->pm_pdpt &
((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
@ -1814,7 +1814,7 @@ pmap_pinit(pmap_t pmap)
for (i = 0; i < NPGPTD; i++) {
pa = VM_PAGE_TO_PHYS(ptdpg[i]);
pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
pmap->pm_pdpt[i] = pa | PG_V;
#endif
}
@ -1973,7 +1973,7 @@ pmap_lazyfix(pmap_t pmap)
lsb--;
CPU_SETOF(lsb, &mask);
mtx_lock_spin(&smp_ipi_mtx);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
lazyptd = vtophys(pmap->pm_pdpt);
#else
lazyptd = vtophys(pmap->pm_pdir);
@ -2057,7 +2057,7 @@ pmap_release(pmap_t pmap)
for (i = 0; i < NPGPTD; i++) {
m = ptdpg[i];
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
("pmap_release: got wrong ptd page"));
#endif
@ -3149,7 +3149,7 @@ pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
}
if ((prot & VM_PROT_WRITE) == 0)
newpde &= ~(PG_RW | PG_M);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
newpde |= pg_nx;
#endif
@ -3182,7 +3182,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
return;
}
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
(VM_PROT_WRITE|VM_PROT_EXECUTE))
return;
@ -3285,13 +3285,13 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
}
pbits &= ~(PG_RW | PG_M);
}
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
pbits |= pg_nx;
#endif
if (pbits != obits) {
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if (!atomic_cmpset_64(pte, obits, pbits))
goto retry;
#else
@ -3605,7 +3605,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((newpte & PG_MANAGED) != 0)
vm_page_aflag_set(m, PGA_WRITEABLE);
}
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
#endif
@ -3632,7 +3632,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
vm_page_aflag_set(om, PGA_REFERENCED);
if (opa != VM_PAGE_TO_PHYS(m))
invlva = TRUE;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((origpte & PG_NX) == 0 &&
(newpte & PG_NX) != 0)
invlva = TRUE;
@ -3703,7 +3703,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
return (FALSE);
}
}
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
newpde |= pg_nx;
#endif
@ -3880,7 +3880,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
pmap->pm_stats.resident_count++;
pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
if ((prot & VM_PROT_EXECUTE) == 0)
pa |= pg_nx;
#endif
@ -5460,7 +5460,7 @@ pmap_activate(struct thread *td)
CPU_CLR(cpuid, &oldpmap->pm_active);
CPU_SET(cpuid, &pmap->pm_active);
#endif
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
cr3 = vtophys(pmap->pm_pdpt);
#else
cr3 = vtophys(pmap->pm_pdir);

View File

@ -174,7 +174,7 @@ ENTRY(cpu_switch)
/* switch address space */
movl PCB_CR3(%edx),%eax
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
cmpl %eax,IdlePDPT /* Kernel address space? */
#else
cmpl %eax,IdlePTD /* Kernel address space? */

View File

@ -884,7 +884,7 @@ trap_pfault(frame, usermode, eva)
*/
if (frame->tf_err & PGEX_W)
ftype = VM_PROT_WRITE;
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
ftype = VM_PROT_EXECUTE;
#endif

View File

@ -122,7 +122,7 @@ ENTRY(vm86_bioscall)
movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */
movl %eax,0(%ebx) /* ... install as PTD entry 0 */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
movl IdlePDPT,%ecx
#endif
movl %ecx,%cr3 /* new page tables */

View File

@ -294,7 +294,7 @@ cpu_fork(td1, p2, td2, flags)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);

View File

@ -90,7 +90,7 @@
#define PAGE_MASK (PAGE_SIZE-1)
#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
#define NPGPTD 4
#define PDRSHIFT 21 /* LOG2(NBPDR) */
#define NPGPTD_SHIFT 9

View File

@ -63,7 +63,7 @@
#define PG_AVAIL2 0x400 /* < programmers use */
#define PG_AVAIL3 0x800 /* \ */
#define PG_PDE_PAT 0x1000 /* PAT PAT index */
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
#define PG_NX (1ull<<63) /* No-execute */
#endif
@ -71,7 +71,7 @@
/* Our various interpretations of the above */
#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */
#define PG_MANAGED PG_AVAIL2
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
#define PG_FRAME (0x000ffffffffff000ull)
#define PG_PS_FRAME (0x000fffffffe00000ull)
#else
@ -110,7 +110,7 @@
* is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE.
*/
#ifndef KVA_PAGES
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
#define KVA_PAGES 512
#else
#define KVA_PAGES 256
@ -128,11 +128,14 @@
* be calculated as follows:
* max_phys / PAGE_SIZE * sizeof(struct vm_page) / NBPDR
* PAE: max_phys 16G, sizeof(vm_page) 76, NBPDR 2M, 152 page table pages.
* PAE_TABLES: max_phys 4G, sizeof(vm_page) 68, NBPDR 2M, 36 page table pages.
* Non-PAE: max_phys 4G, sizeof(vm_page) 68, NBPDR 4M, 18 page table pages.
*/
#ifndef NKPT
#ifdef PAE
#if defined(PAE)
#define NKPT 240
#elif defined(PAE_TABLES)
#define NKPT 60
#else
#define NKPT 30
#endif
@ -166,7 +169,7 @@
#include <vm/_vm_radix.h>
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
typedef uint64_t pdpt_entry_t;
typedef uint64_t pd_entry_t;
@ -193,7 +196,7 @@ extern pt_entry_t PTmap[];
extern pd_entry_t PTD[];
extern pd_entry_t PTDpde[];
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
extern pdpt_entry_t *IdlePDPT;
#endif
extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
@ -331,7 +334,7 @@ pmap_kextract(vm_offset_t va)
#define PT_UPDATES_FLUSH()
#endif
#if defined(PAE) && !defined(XEN)
#if (defined(PAE) || defined(PAE_TABLES)) && !defined(XEN)
#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte)
@ -340,7 +343,7 @@ pmap_kextract(vm_offset_t va)
extern pt_entry_t pg_nx;
#elif !defined(PAE) && !defined(XEN)
#elif !defined(PAE) && !defined(PAE_TABLES) && !defined(XEN)
#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new)
#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte)
@ -375,8 +378,8 @@ struct pmap {
cpuset_t pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
#ifdef PAE
pdpt_entry_t *pm_pdpt; /* KVA of page director pointer
#if defined(PAE) || defined(PAE_TABLES)
pdpt_entry_t *pm_pdpt; /* KVA of page directory pointer
table */
#endif
struct vm_radix pm_root; /* spare page table pages */

View File

@ -120,11 +120,11 @@
#endif
/*
* Level 0 reservations consist of 512 pages under PAE and 1024 pages
* otherwise.
* Level 0 reservations consist of 512 pages when PAE pagetables are
* used, and 1024 pages otherwise.
*/
#ifndef VM_LEVEL_0_ORDER
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
#define VM_LEVEL_0_ORDER 9
#else
#define VM_LEVEL_0_ORDER 10

View File

@ -368,7 +368,7 @@ acpi_install_wakeup_handler(struct acpi_softc *sc)
/* Save pointers to some global data. */
WAKECODE_FIXUP(wakeup_ret, void *, resumectx);
#ifndef __amd64__
#ifdef PAE
#if defined(PAE) || defined(PAE_TABLES)
WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt));
#else
WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir));