Optimize i386 pmap_extract_and_hold().
In particular, stop using pmap_pte() to read non-promoted pte while walking the page table. pmap_pte() needs to shoot down the kernel mapping globally which causes IPI broadcast. Since pmap_extract_and_hold() is used for slow copyin(9), it is very significant hit for the 4/4 kernels. Instead, create single purpose per-processor page frame and use it to locally map page table page inside the critical section, to avoid reuse of the frame by other thread if context switched. Measurement demostrated very significant improvements in any load that utilizes copyin/copyout. Found and benchmarked by: bde Sponsored by: The FreeBSD Foundation
This commit is contained in:
parent
feac1d4808
commit
ded29bd9a5
@ -692,6 +692,10 @@ pmap_init_reserved_pages(void)
|
||||
pc->pc_copyout_saddr = kva_alloc(ptoa(2));
|
||||
if (pc->pc_copyout_saddr == 0)
|
||||
panic("unable to allocate sleepable copyout KVA");
|
||||
pc->pc_pmap_eh_va = kva_alloc(ptoa(1));
|
||||
if (pc->pc_pmap_eh_va == 0)
|
||||
panic("unable to allocate pmap_extract_and_hold KVA");
|
||||
pc->pc_pmap_eh_ptep = (char *)vtopte(pc->pc_pmap_eh_va);
|
||||
|
||||
/*
|
||||
* Skip if the mappings have already been initialized,
|
||||
@ -1598,8 +1602,8 @@ pmap_extract(pmap_t pmap, vm_offset_t va)
|
||||
vm_page_t
|
||||
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
|
||||
{
|
||||
pd_entry_t pde;
|
||||
pt_entry_t pte, *ptep;
|
||||
pd_entry_t pde, newpf;
|
||||
pt_entry_t *eh_ptep, pte, *ptep;
|
||||
vm_page_t m;
|
||||
vm_paddr_t pa;
|
||||
|
||||
@ -1619,9 +1623,17 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
|
||||
vm_page_hold(m);
|
||||
}
|
||||
} else {
|
||||
ptep = pmap_pte(pmap, va);
|
||||
newpf = pde & PG_FRAME;
|
||||
critical_enter();
|
||||
eh_ptep = (pt_entry_t *)PCPU_GET(pmap_eh_ptep);
|
||||
if ((*eh_ptep & PG_FRAME) != newpf) {
|
||||
*eh_ptep = newpf | PG_RW | PG_V | PG_A | PG_M;
|
||||
invlcaddr((void *)PCPU_GET(pmap_eh_va));
|
||||
}
|
||||
ptep = (pt_entry_t *)PCPU_GET(pmap_eh_va) +
|
||||
(i386_btop(va) & (NPTEPG - 1));
|
||||
pte = *ptep;
|
||||
pmap_pte_release(ptep);
|
||||
critical_exit();
|
||||
if (pte != 0 &&
|
||||
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
|
||||
if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
|
||||
|
@ -76,9 +76,11 @@
|
||||
struct mtx pc_copyout_mlock; \
|
||||
struct sx pc_copyout_slock; \
|
||||
char *pc_copyout_buf; \
|
||||
vm_offset_t pc_pmap_eh_va; \
|
||||
caddr_t pc_pmap_eh_ptep; \
|
||||
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
|
||||
uint32_t pc_ibpb_set; \
|
||||
char __pad[546]
|
||||
char __pad[538]
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user