Optimize i386 pmap_extract_and_hold().

In particular, stop using pmap_pte() to read non-promoted pte while
walking the page table.  pmap_pte() needs to shoot down the kernel
mapping globally which causes IPI broadcast.  Since
pmap_extract_and_hold() is used for slow copyin(9), it is very
significant hit for the 4/4 kernels.

Instead, create single purpose per-processor page frame and use it to
locally map page table page inside the critical section, to avoid
reuse of the frame by other thread if context switched.

Measurement demostrated very significant improvements in any load that
utilizes copyin/copyout.

Found and benchmarked by:	bde
Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Konstantin Belousov 2018-05-25 16:29:22 +00:00
parent feac1d4808
commit ded29bd9a5
2 changed files with 19 additions and 5 deletions

View File

@ -692,6 +692,10 @@ pmap_init_reserved_pages(void)
pc->pc_copyout_saddr = kva_alloc(ptoa(2));
if (pc->pc_copyout_saddr == 0)
panic("unable to allocate sleepable copyout KVA");
pc->pc_pmap_eh_va = kva_alloc(ptoa(1));
if (pc->pc_pmap_eh_va == 0)
panic("unable to allocate pmap_extract_and_hold KVA");
pc->pc_pmap_eh_ptep = (char *)vtopte(pc->pc_pmap_eh_va);
/*
* Skip if the mappings have already been initialized,
@ -1598,8 +1602,8 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pd_entry_t pde;
pt_entry_t pte, *ptep;
pd_entry_t pde, newpf;
pt_entry_t *eh_ptep, pte, *ptep;
vm_page_t m;
vm_paddr_t pa;
@ -1619,9 +1623,17 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
vm_page_hold(m);
}
} else {
ptep = pmap_pte(pmap, va);
newpf = pde & PG_FRAME;
critical_enter();
eh_ptep = (pt_entry_t *)PCPU_GET(pmap_eh_ptep);
if ((*eh_ptep & PG_FRAME) != newpf) {
*eh_ptep = newpf | PG_RW | PG_V | PG_A | PG_M;
invlcaddr((void *)PCPU_GET(pmap_eh_va));
}
ptep = (pt_entry_t *)PCPU_GET(pmap_eh_va) +
(i386_btop(va) & (NPTEPG - 1));
pte = *ptep;
pmap_pte_release(ptep);
critical_exit();
if (pte != 0 &&
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,

View File

@ -76,9 +76,11 @@
struct mtx pc_copyout_mlock; \
struct sx pc_copyout_slock; \
char *pc_copyout_buf; \
vm_offset_t pc_pmap_eh_va; \
caddr_t pc_pmap_eh_ptep; \
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
uint32_t pc_ibpb_set; \
char __pad[546]
char __pad[538]
#ifdef _KERNEL