Allocate per-cpu resources for doing pmap_zero_page() and pmap_copy_page().

This is performance enhancement rather than bugfix.
This commit is contained in:
Ian Lepore 2014-04-04 17:57:49 +00:00
parent a2f5bf29cd
commit 24ca71778e

View File

@ -265,9 +265,18 @@ vm_offset_t vm_max_kernel_address;
struct pmap kernel_pmap_store;
static pt_entry_t *csrc_pte, *cdst_pte;
static vm_offset_t csrcp, cdstp;
static struct mtx cmtx;
/*
* Resources for quickly copying and zeroing pages using virtual address space
* and page table entries that are pre-allocated per-CPU by pmap_init().
*/
struct czpages {
struct mtx lock;
pt_entry_t *srcptep;
pt_entry_t *dstptep;
vm_offset_t srcva;
vm_offset_t dstva;
};
static struct czpages cpu_czpages[MAXCPU];
static void pmap_init_l1(struct l1_ttable *, pd_entry_t *);
/*
@ -1803,13 +1812,14 @@ pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt)
struct l1_ttable *l1 = &static_l1;
struct l2_dtable *l2;
struct l2_bucket *l2b;
struct czpages *czp;
pd_entry_t pde;
pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
pt_entry_t *ptep;
vm_paddr_t pa;
vm_offset_t va;
vm_size_t size;
int l1idx, l2idx, l2next = 0;
int i, l1idx, l2idx, l2next = 0;
PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n",
firstaddr, vm_max_kernel_address));
@ -1921,13 +1931,16 @@ pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt)
/*
* Reserve some special page table entries/VA space for temporary
* mapping of pages.
* mapping of pages that are being copied or zeroed.
*/
for (czp = cpu_czpages, i = 0; i < MAXCPU; ++i, ++czp) {
mtx_init(&czp->lock, "czpages", NULL, MTX_DEF);
pmap_alloc_specials(&virtual_avail, 1, &czp->srcva, &czp->srcptep);
pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->srcptep);
pmap_alloc_specials(&virtual_avail, 1, &czp->dstva, &czp->dstptep);
pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->dstptep);
}
pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) /
L1_S_SIZE;
pmap_alloc_specials(&virtual_avail,
@ -1955,7 +1968,6 @@ pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt)
virtual_avail = round_page(virtual_avail);
virtual_end = vm_max_kernel_address;
kernel_vm_end = pmap_curmaxkvaddr;
mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
}
@ -4439,39 +4451,42 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
static void
pmap_zero_page_gen(vm_page_t m, int off, int size)
{
struct czpages *czp;
KASSERT(TAILQ_EMPTY(&m->md.pv_list),
("pmap_zero_page_gen: page has mappings"));
vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
if (!TAILQ_EMPTY(&m->md.pv_list))
panic("pmap_zero_page: page has mappings");
mtx_lock(&cmtx);
sched_pin();
czp = &cpu_czpages[PCPU_GET(cpuid)];
mtx_lock(&czp->lock);
/*
* Hook in the page, zero it, invalidate the TLB as needed.
*
* Note the temporary zero-page mapping must be a non-cached page in
* order to work without corruption when write-allocate is enabled.
* Hook in the page, zero it.
*/
*cdst_pte = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(cdst_pte, VM_PROT_WRITE, 0);
PTE_SYNC(cdst_pte);
cpu_tlb_flushD_SE(cdstp);
*czp->dstptep = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(czp->dstptep, VM_PROT_WRITE, 0);
PTE_SYNC(czp->dstptep);
cpu_tlb_flushD_SE(czp->dstva);
cpu_cpwait();
if (off || size != PAGE_SIZE)
bzero((void *)(cdstp + off), size);
bzero((void *)(czp->dstva + off), size);
else
bzero_page(cdstp);
bzero_page(czp->dstva);
/*
* Although aliasing is not possible if we use
* cdstp temporary mappings with memory that
* will be mapped later as non-cached or with write-through
* caches we might end up overwriting it when calling wbinv_all
* So make sure caches are clean after copy operation
* Although aliasing is not possible, if we use temporary mappings with
* memory that will be mapped later as non-cached or with write-through
* caches, we might end up overwriting it when calling wbinv_all. So
* make sure caches are clean after the operation.
*/
cpu_idcache_wbinv_range(cdstp, size);
pmap_l2cache_wbinv_range(cdstp, phys, size);
cpu_idcache_wbinv_range(czp->dstva, size);
pmap_l2cache_wbinv_range(czp->dstva, phys, size);
mtx_unlock(&cmtx);
mtx_unlock(&czp->lock);
sched_unpin();
}
/*
@ -4529,45 +4544,39 @@ pmap_zero_page_idle(vm_page_t m)
void
pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
{
struct czpages *czp;
sched_pin();
czp = &cpu_czpages[PCPU_GET(cpuid)];
mtx_lock(&czp->lock);
/*
* Hold the source page's lock for the duration of the copy
* so that no other mappings can be created while we have a
* potentially aliased mapping.
* Map the pages into the page hook points, copy them, and purge
* the cache for the appropriate page. Invalidate the TLB
* as required.
* Map the pages into the page hook points, copy them, and purge the
* cache for the appropriate page.
*/
mtx_lock(&cmtx);
/* For ARMv6 using System bit is deprecated and mapping with AP
* bits set to 0x0 makes page not accessible. csrc_pte is mapped
* read/write until proper mapping defines are created for ARMv6.
*/
*csrc_pte = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
PTE_SYNC(csrc_pte);
*cdst_pte = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
PTE_SYNC(cdst_pte);
cpu_tlb_flushD_SE(csrcp);
cpu_tlb_flushD_SE(cdstp);
*czp->srcptep = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(czp->srcptep, VM_PROT_READ, 0);
PTE_SYNC(czp->srcptep);
cpu_tlb_flushD_SE(czp->srcva);
*czp->dstptep = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0);
PTE_SYNC(czp->dstptep);
cpu_tlb_flushD_SE(czp->dstva);
cpu_cpwait();
bcopy_page(czp->srcva, czp->dstva);
/*
* Although aliasing is not possible if we use
* cdstp temporary mappings with memory that
* will be mapped later as non-cached or with write-through
* caches we might end up overwriting it when calling wbinv_all
* So make sure caches are clean after copy operation
* Although aliasing is not possible, if we use temporary mappings with
* memory that will be mapped later as non-cached or with write-through
* caches, we might end up overwriting it when calling wbinv_all. So
* make sure caches are clean after the operation.
*/
bcopy_page(csrcp, cdstp);
cpu_idcache_wbinv_range(czp->dstva, PAGE_SIZE);
pmap_l2cache_wbinv_range(czp->dstva, dst, PAGE_SIZE);
cpu_idcache_wbinv_range(cdstp, PAGE_SIZE);
pmap_l2cache_wbinv_range(cdstp, dst, PAGE_SIZE);
mtx_unlock(&cmtx);
mtx_unlock(&czp->lock);
sched_unpin();
}
int unmapped_buf_allowed = 1;
@ -4579,8 +4588,12 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_page_t a_pg, b_pg;
vm_offset_t a_pg_offset, b_pg_offset;
int cnt;
struct czpages *czp;
sched_pin();
czp = &cpu_czpages[PCPU_GET(cpuid)];
mtx_lock(&czp->lock);
mtx_lock(&cmtx);
while (xfersize > 0) {
a_pg = ma[a_offset >> PAGE_SHIFT];
a_pg_offset = a_offset & PAGE_MASK;
@ -4588,27 +4601,29 @@ pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
b_pg = mb[b_offset >> PAGE_SHIFT];
b_pg_offset = b_offset & PAGE_MASK;
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
*csrc_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) |
*czp->srcptep = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) |
pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
PTE_SYNC(csrc_pte);
*cdst_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) |
pmap_set_prot(czp->srcptep, VM_PROT_READ, 0);
PTE_SYNC(czp->srcptep);
cpu_tlb_flushD_SE(czp->srcva);
*czp->dstptep = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) |
pte_l2_s_cache_mode | L2_S_REF;
pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
PTE_SYNC(cdst_pte);
cpu_tlb_flushD_SE(csrcp);
cpu_tlb_flushD_SE(cdstp);
pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0);
PTE_SYNC(czp->dstptep);
cpu_tlb_flushD_SE(czp->dstva);
cpu_cpwait();
bcopy((char *)csrcp + a_pg_offset, (char *)cdstp + b_pg_offset,
bcopy((char *)czp->srcva + a_pg_offset, (char *)czp->dstva + b_pg_offset,
cnt);
cpu_idcache_wbinv_range(cdstp + b_pg_offset, cnt);
pmap_l2cache_wbinv_range(cdstp + b_pg_offset,
cpu_idcache_wbinv_range(czp->dstva + b_pg_offset, cnt);
pmap_l2cache_wbinv_range(czp->dstva + b_pg_offset,
VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt);
xfersize -= cnt;
a_offset += cnt;
b_offset += cnt;
}
mtx_unlock(&cmtx);
mtx_unlock(&czp->lock);
sched_unpin();
}
void