o Implement shared pagetables and switch from 4 to 3 levels page

memory system.

RISC-V ISA has only single page table base register for both kernel
and user addresses translation. Before this commit we were using an
extra (4th) level of pagetables for switching between kernel and user
pagetables, but then realized FPGA hardware has 3-level page system
hardcoded. It is also become clear that the bitfile synthesized for
4-level system is untested/broken, so we can't use extra level for
switching.

We are now share level 1 of pagetables between kernel and user VA.
This requires to keep track of all the user pmaps created and once we
adding L1 page to kernel pmap we have to add it to all the user pmaps.

o Change the VM layout as we must have topmost bit to be 1 in the
  selected page system for kernel addresses and 0 for user addresses.
o Implement pmap_kenter_device().
o Create the l3 tables for the early devmap.

Sponsored by:	DARPA, AFRL
Sponsored by:	HEIF5
This commit is contained in:
br 2016-04-25 14:47:51 +00:00
parent 03e30c2263
commit 7b9e9617a5
8 changed files with 140 additions and 76 deletions

View File

@ -46,9 +46,8 @@
#define PCPU_MD_FIELDS \
uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \
uint64_t pc_sptbr; /* L0 page table base (VA) */ \
uint64_t pc_reg; /* CPU MMIO base (PA) */ \
char __pad[109]
char __pad[117]
#ifdef _KERNEL

View File

@ -74,12 +74,18 @@ struct pv_addr {
vm_paddr_t pv_pa;
};
/* An entry in the list of all pmaps */
struct pmap_list_entry {
SLIST_ENTRY(pmap_list_entry) pmap_link;
struct pmap *pmap;
};
struct pmap {
struct mtx pm_mtx;
struct pmap_statistics pm_stats; /* pmap statictics */
pd_entry_t *pm_l1;
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
struct pmap_list_entry *p_entry; /* Place in the list of all pmaps */
};
typedef struct pv_entry {

View File

@ -153,12 +153,12 @@
#define VM_MAX_ADDRESS (0xffffffffffffffffUL)
/* 32 GiB of kernel addresses */
#define VM_MIN_KERNEL_ADDRESS (0xffffff8000000000UL)
#define VM_MAX_KERNEL_ADDRESS (0xffffff8800000000UL)
#define VM_MIN_KERNEL_ADDRESS (0xffffffc000000000UL)
#define VM_MAX_KERNEL_ADDRESS (0xffffffc800000000UL)
/* Direct Map for 128 GiB of PA: 0x0 - 0x1fffffffff */
#define DMAP_MIN_ADDRESS (0xffffffc000000000UL)
#define DMAP_MAX_ADDRESS (0xffffffdfffffffffUL)
#define DMAP_MIN_ADDRESS (0xffffffd000000000UL)
#define DMAP_MAX_ADDRESS (0xffffffefffffffffUL)
#define DMAP_MIN_PHYSADDR (0x0000000000000000UL)
#define DMAP_MAX_PHYSADDR (DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS)
@ -187,7 +187,7 @@
})
#define VM_MIN_USER_ADDRESS (0x0000000000000000UL)
#define VM_MAX_USER_ADDRESS (0x0000008000000000UL)
#define VM_MAX_USER_ADDRESS (0x0000004000000000UL)
#define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS)
#define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS)

View File

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
ASSYM(KERNBASE, KERNBASE);
ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
@ -75,7 +76,6 @@ ASSYM(PCB_A, offsetof(struct pcb, pcb_a));
ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
ASSYM(PC_SPTBR, offsetof(struct pcpu, pc_sptbr));
ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));

View File

@ -141,36 +141,31 @@ _start:
/* Page tables */
/* Level 0 */
la s1, pagetable_l0
la s2, pagetable_l1 /* Link to next level PN */
/* Create an L1 page for early devmap */
la s1, pagetable_l1
la s2, pagetable_l2_devmap /* Link to next level PN */
srli s2, s2, PAGE_SHIFT
li a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE)
srli a5, a5, L1_SHIFT /* >> L1_SHIFT */
andi a5, a5, 0x1ff /* & 0x1ff */
li t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S))
slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */
or t6, t4, t5
/* Store single level0 PTE entry to position */
li a5, 0x1ff
/* Store single level1 PTE entry to position */
li a6, PTE_SIZE
mulw a5, a5, a6
add t0, s1, a5
/* Store it to pagetable_l0 for each cpu */
li t1, MAXCPU
li t2, PAGE_SIZE
1:
sd t6, 0(t0)
add t0, t0, t2
addi t1, t1, -1
bnez t1, 1b
sd t6, (t0)
/* Level 1 */
/* Add single Level 1 entry for kernel */
la s1, pagetable_l1
la s2, pagetable_l2 /* Link to next level PN */
srli s2, s2, PAGE_SHIFT
li a5, KERNBASE
srli a5, a5, 0x1e /* >> 30 */
srli a5, a5, L1_SHIFT /* >> L1_SHIFT */
andi a5, a5, 0x1ff /* & 0x1ff */
li t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S))
slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */
@ -198,13 +193,13 @@ _start:
bltu t4, t3, 2b
/* Set page tables base register */
la s1, pagetable_l0
la s1, pagetable_l1
csrw sptbr, s1
/* Page tables END */
/* Enter supervisor mode */
li s0, ((MSTATUS_VM_SV48 << MSTATUS_VM_SHIFT) | \
li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \
(MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \
(MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \
(MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT));
@ -290,14 +285,12 @@ szsigcode:
.quad esigcode - sigcode
.align 12
.globl pagetable_l0
pagetable_l0:
.space (PAGE_SIZE * MAXCPU)
pagetable_l1:
.space PAGE_SIZE
pagetable_l2:
.space PAGE_SIZE
pagetable_end:
pagetable_l2_devmap:
.space PAGE_SIZE
.globl init_pt_va
init_pt_va:
@ -336,15 +329,11 @@ ENTRY(mpentry)
build_ring
/* Set page tables base register */
la t0, pagetable_l0
li t1, PAGE_SIZE
mulw t1, t1, a0
add t0, t0, t1
la t0, pagetable_l1
csrw sptbr, t0
/* Page tables END */
/* Configure mstatus */
li s0, ((MSTATUS_VM_SV48 << MSTATUS_VM_SHIFT) | \
li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \
(MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \
(MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \
(MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT));

View File

@ -93,7 +93,6 @@ __FBSDID("$FreeBSD$");
#endif
struct pcpu __pcpu[MAXCPU];
extern uint64_t pagetable_l0;
static struct trapframe proc0_tf;
@ -390,12 +389,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
{
uint64_t addr;
addr = (uint64_t)&pagetable_l0;
addr += (cpuid * PAGE_SIZE);
pcpu->pc_sptbr = addr;
}
void

View File

@ -207,6 +207,12 @@ __FBSDID("$FreeBSD$");
#define VM_PAGE_TO_PV_LIST_LOCK(m) \
PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
/* The list of all the pmaps */
static SLIST_HEAD(, pmap_list_entry) pmap_list =
SLIST_HEAD_INITIALIZER(pmap_list);
static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
struct pmap kernel_pmap_store;
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
@ -304,7 +310,8 @@ pmap_l2(pmap_t pmap, vm_offset_t va)
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
if (l1 == NULL)
return (NULL);
if ((pmap_load(l1) & PTE_VALID) == 0)
return (NULL);
if ((pmap_load(l1) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
@ -335,7 +342,7 @@ pmap_l3(pmap_t pmap, vm_offset_t va)
return (NULL);
if ((pmap_load(l2) & PTE_VALID) == 0)
return (NULL);
if (l2 == NULL || (pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
return (NULL);
return (pmap_l2_to_l3(l2, va));
@ -405,6 +412,28 @@ pmap_resident_count_dec(pmap_t pmap, int count)
pmap->pm_stats.resident_count -= count;
}
static void
pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
pt_entry_t entry)
{
struct pmap_list_entry *p_entry;
struct pmap *user_pmap;
pd_entry_t *l1;
/* Distribute new kernel L1 entry to all the user pmaps */
if (pmap != kernel_pmap)
return;
SLIST_FOREACH(p_entry, &pmap_list, pmap_link) {
user_pmap = p_entry->pmap;
l1 = &user_pmap->pm_l1[l1index];
if (entry)
pmap_load_store(l1, entry);
else
pmap_load_clear(l1);
}
}
static pt_entry_t *
pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
u_int *l2_slot)
@ -462,10 +491,9 @@ pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart)
KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
/* superpages */
pn = ((pa >> L1_SHIFT) & Ln_ADDR_MASK);
pn = (pa / PAGE_SIZE);
entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
entry |= (pn << PTE_PPN2_S);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(&l1[l1_slot], entry);
}
@ -473,6 +501,44 @@ pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart)
cpu_tlb_flushID();
}
static vm_offset_t
pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
{
vm_offset_t l2pt, l3pt;
pt_entry_t entry;
pd_entry_t *l2;
vm_paddr_t pa;
u_int l2_slot;
pn_t pn;
KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
l2 = pmap_l2(kernel_pmap, va);
l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
l2pt = (vm_offset_t)l2;
l2_slot = pmap_l2_index(va);
l3pt = l3_start;
for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
pa = pmap_early_vtophys(l1pt, l3pt);
pn = (pa / PAGE_SIZE);
entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
entry |= (pn << PTE_PPN0_S);
pmap_load_store(&l2[l2_slot], entry);
l3pt += PAGE_SIZE;
}
/* Clean the L2 page table */
memset((void *)l3_start, 0, l3pt - l3_start);
cpu_dcache_wb_range(l3_start, l3pt - l3_start);
cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
return l3pt;
}
/*
* Bootstrap the system enough to run with virtual memory.
*/
@ -578,6 +644,10 @@ pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
freemempos = KERNBASE + kernlen;
freemempos = roundup2(freemempos, PAGE_SIZE);
/* Create the l3 tables for the early devmap */
freemempos = pmap_bootstrap_l3(l1pt,
VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
cpu_tlb_flushID();
#define alloc_pages(var, np) \
@ -815,10 +885,10 @@ pmap_kextract(vm_offset_t va)
void
pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
{
pt_entry_t entry;
pt_entry_t *l3;
vm_offset_t va;
panic("%s: implement me\n", __func__);
pn_t pn;
KASSERT((pa & L3_OFFSET) == 0,
("pmap_kenter_device: Invalid physical address"));
@ -831,11 +901,12 @@ pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
while (size != 0) {
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
panic("%s: unimplemented", __func__);
#if 0 /* implement me */
pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
ATTR_IDX(DEVICE_MEMORY) | L3_PAGE);
#endif
pn = (pa / PAGE_SIZE);
entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l3, entry);
PTE_SYNC(l3);
va += PAGE_SIZE;
@ -1037,6 +1108,7 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
pmap_load_clear(l1);
pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
PTE_SYNC(l1);
} else {
/* PTE page */
@ -1105,6 +1177,7 @@ pmap_pinit0(pmap_t pmap)
int
pmap_pinit(pmap_t pmap)
{
struct pmap_list_entry *p_entry;
vm_paddr_t l1phys;
vm_page_t l1pt;
@ -1123,6 +1196,16 @@ pmap_pinit(pmap_t pmap)
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
/* Install kernel pagetables */
memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE);
p_entry = malloc(sizeof(struct pmap_list_entry), M_VMPMAP, M_WAITOK);
p_entry->pmap = pmap;
pmap->p_entry = p_entry;
/* Add to the list of all pmaps */
SLIST_INSERT_HEAD(&pmap_list, p_entry, pmap_link);
return (1);
}
@ -1187,6 +1270,7 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(pmap, l1index, entry);
PTE_SYNC(l1);
@ -1289,6 +1373,13 @@ pmap_release(pmap_t pmap)
m->wire_count--;
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
vm_page_free_zero(m);
/* Remove kernel pagetables */
bzero(pmap->pm_l1, PAGE_SIZE);
/* Remove pmap from the all pmaps list */
SLIST_REMOVE(&pmap_list, pmap->p_entry,
pmap_list_entry, pmap_link);
}
#if 0
@ -1347,6 +1438,8 @@ pmap_growkernel(vm_offset_t addr)
entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(kernel_pmap,
pmap_l1_index(kernel_vm_end), entry);
PTE_SYNC(l1);
continue; /* try again */
@ -2003,6 +2096,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
entry |= (l2_pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
PTE_SYNC(l1);
l2 = pmap_l1_to_l2(l1, va);
@ -3085,18 +3179,13 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
void
pmap_activate(struct thread *td)
{
pt_entry_t entry;
pn_t pn;
pmap_t pmap;
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
pn = (td->td_pcb->pcb_l1addr / PAGE_SIZE);
entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
entry |= (pn << PTE_PPN0_S);
pmap_load_store((uint64_t *)PCPU_GET(sptbr), entry);
__asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr));
pmap_invalidate_all(pmap);
critical_exit();

View File

@ -55,14 +55,8 @@ ENTRY(cpu_throw)
sfence.vm
/* Switch to the new pmap */
ld t1, PCB_L1ADDR(x13) /* Link to next level PN */
srli t1, t1, PAGE_SHIFT /* PN no */
li t2, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S))
slli t3, t1, PTE_PPN0_S /* (t1 << PTE_PPN0_S) */
or t4, t2, t3
/* Store single level0 PTE entry to position */
ld t0, PC_SPTBR(gp)
sd t4, 0(t0)
ld t0, PCB_L1ADDR(x13)
csrw sptbr, t0
/* TODO: Invalidate the TLB */
@ -140,14 +134,8 @@ ENTRY(cpu_switch)
sfence.vm
/* Switch to the new pmap */
ld t1, PCB_L1ADDR(x13) /* Link to next level PN */
srli t1, t1, PAGE_SHIFT /* PN no */
li t2, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S))
slli t3, t1, PTE_PPN0_S /* (t1 << PTE_PPN0_S) */
or t4, t2, t3
/* Store single level0 PTE entry to position */
ld t0, PC_SPTBR(gp)
sd t4, 0(t0)
ld t0, PCB_L1ADDR(x13)
csrw sptbr, t0
/* TODO: Invalidate the TLB */