For CPUs which ignore TD_CV and support hardware unaliasing don't

bother doing page coloring. This results in a small but measurable
performance improvement in buildworld times.
This commit is contained in:
Marius Strobl 2010-08-08 00:01:08 +00:00
parent a0bca95502
commit 820a9ea5cb
6 changed files with 80 additions and 60 deletions

View File

@ -49,7 +49,7 @@
#define DCACHE_COLORS (1 << DCACHE_COLOR_BITS)
#define DCACHE_COLOR_MASK (DCACHE_COLORS - 1)
#define DCACHE_COLOR(va) (((va) >> PAGE_SHIFT) & DCACHE_COLOR_MASK)
#define DCACHE_OTHER_COLOR(color) \
#define DCACHE_OTHER_COLOR(color) \
((color) ^ DCACHE_COLOR_BITS)
#define DC_TAG_SHIFT 2
@ -89,6 +89,8 @@ struct cacheinfo {
#ifdef _KERNEL
extern u_int dcache_color_ignore;
struct pcpu;
typedef void cache_enable_t(u_int cpu_impl);

View File

@ -88,6 +88,8 @@ cache_flush_t *cache_flush;
dcache_page_inval_t *dcache_page_inval;
icache_page_inval_t *icache_page_inval;
u_int dcache_color_ignore;
#define OF_GET(h, n, v) OF_getprop((h), (n), &(v), sizeof(v))
static u_int cache_new_prop(u_int cpu_impl);
@ -114,6 +116,13 @@ cache_init(struct pcpu *pcpu)
u_long set;
u_int use_new_prop;
/*
* For CPUs which ignore TD_CV and support hardware unaliasing don't
* bother doing page coloring. This is equal across all CPUs.
*/
if (pcpu->pc_cpuid == 0 && pcpu->pc_impl == CPU_IMPL_SPARC64V)
dcache_color_ignore = 1;
use_new_prop = cache_new_prop(pcpu->pc_impl);
if (OF_GET(pcpu->pc_node, !use_new_prop ? "icache-size" :
"l1-icache-size", pcpu->pc_cache.ic_size) == -1 ||
@ -145,9 +154,8 @@ cache_init(struct pcpu *pcpu)
* For CPUs which don't support unaliasing in hardware ensure that
* the data cache doesn't have too many virtual colors.
*/
if (pcpu->pc_impl != CPU_IMPL_SPARC64V &&
((pcpu->pc_cache.dc_size / pcpu->pc_cache.dc_assoc) /
PAGE_SIZE) != DCACHE_COLORS)
if (dcache_color_ignore == 0 && ((pcpu->pc_cache.dc_size /
pcpu->pc_cache.dc_assoc) / PAGE_SIZE) != DCACHE_COLORS)
panic("cache_init: too many D$ colors");
set = pcpu->pc_cache.ec_size / pcpu->pc_cache.ec_assoc;
if ((set & ~(1UL << (ffs(set) - 1))) != 0)

View File

@ -95,8 +95,10 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
vm_page_t m;
int error;
int i;
uint32_t colors;
cnt = 0;
colors = 1;
error = 0;
ova = 0;
@ -134,20 +136,20 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
}
if (m != NULL) {
if (ova == 0) {
#ifndef SUN4V
if (ova == 0)
if (dcache_color_ignore == 0)
colors = DCACHE_COLORS;
#endif
ova = kmem_alloc_wait(kernel_map,
PAGE_SIZE * DCACHE_COLORS);
if (m->md.color != -1)
PAGE_SIZE * colors);
}
#ifndef SUN4V
if (colors != 1 && m->md.color != -1)
va = ova + m->md.color * PAGE_SIZE;
else
va = ova;
#else
if (ova == 0)
ova = kmem_alloc_wait(kernel_map,
PAGE_SIZE);
va = ova;
#endif
va = ova;
pmap_qenter(va, &m, 1);
error = uiomove((void *)(va + off), cnt,
uio);
@ -158,8 +160,7 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
uio);
}
break;
}
else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
} else if (dev2unit(dev) == CDEV_MINOR_KMEM) {
va = trunc_page(uio->uio_offset);
eva = round_page(uio->uio_offset + iov->iov_len);
@ -184,15 +185,12 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
/* else panic! */
}
if (ova != 0)
#ifndef SUN4V
kmem_free_wakeup(kernel_map, ova, PAGE_SIZE * DCACHE_COLORS);
#else
kmem_free_wakeup(kernel_map, ova, PAGE_SIZE);
#endif
kmem_free_wakeup(kernel_map, ova, PAGE_SIZE * colors);
return (error);
}
void
dev_mem_md_init(void)
{
}

View File

@ -154,7 +154,7 @@ struct pmap kernel_pmap_store;
/*
* Allocate physical memory for use in pmap_bootstrap.
*/
static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size);
static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
/*
* Map the given physical page at the specified virtual address in the
@ -308,6 +308,9 @@ pmap_bootstrap(u_int cpu_impl)
int i;
int j;
int sz;
uint32_t colors;
colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
/*
* Find out what physical memory is available from the PROM and
@ -379,7 +382,7 @@ pmap_bootstrap(u_int cpu_impl)
/*
* Allocate the kernel TSB and lock it in the TLB.
*/
pa = pmap_bootstrap_alloc(tsb_kernel_size);
pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
if (pa & PAGE_MASK_4M)
panic("pmap_bootstrap: tsb unaligned\n");
tsb_kernel_phys = pa;
@ -390,13 +393,13 @@ pmap_bootstrap(u_int cpu_impl)
/*
* Allocate and map the dynamic per-CPU area for the BSP.
*/
pa = pmap_bootstrap_alloc(DPCPU_SIZE);
pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
/*
* Allocate and map the message buffer.
*/
pa = pmap_bootstrap_alloc(MSGBUF_SIZE);
pa = pmap_bootstrap_alloc(MSGBUF_SIZE, colors);
msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
/*
@ -458,26 +461,26 @@ pmap_bootstrap(u_int cpu_impl)
* Allocate kva space for temporary mappings.
*/
pmap_idle_map = virtual_avail;
virtual_avail += PAGE_SIZE * DCACHE_COLORS;
virtual_avail += PAGE_SIZE * colors;
pmap_temp_map_1 = virtual_avail;
virtual_avail += PAGE_SIZE * DCACHE_COLORS;
virtual_avail += PAGE_SIZE * colors;
pmap_temp_map_2 = virtual_avail;
virtual_avail += PAGE_SIZE * DCACHE_COLORS;
virtual_avail += PAGE_SIZE * colors;
/*
* Allocate a kernel stack with guard page for thread0 and map it
* into the kernel TSB. We must ensure that the virtual address is
* coloured properly, since we're allocating from phys_avail so the
* memory won't have an associated vm_page_t.
* colored properly for corresponding CPUs, since we're allocating
* from phys_avail so the memory won't have an associated vm_page_t.
*/
pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE);
pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
kstack0_phys = pa;
virtual_avail += roundup(KSTACK_GUARD_PAGES, DCACHE_COLORS) *
PAGE_SIZE;
virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
kstack0 = virtual_avail;
virtual_avail += roundup(KSTACK_PAGES, DCACHE_COLORS) * PAGE_SIZE;
KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
("pmap_bootstrap: kstack0 miscoloured"));
virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
if (dcache_color_ignore == 0)
KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
("pmap_bootstrap: kstack0 miscolored"));
for (i = 0; i < KSTACK_PAGES; i++) {
pa = kstack0_phys + i * PAGE_SIZE;
va = kstack0 + i * PAGE_SIZE;
@ -609,12 +612,12 @@ pmap_map_tsb(void)
* calculated.
*/
static vm_paddr_t
pmap_bootstrap_alloc(vm_size_t size)
pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
{
vm_paddr_t pa;
int i;
size = roundup(size, PAGE_SIZE * DCACHE_COLORS);
size = roundup(size, PAGE_SIZE * colors);
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
if (phys_avail[i + 1] - phys_avail[i] < size)
continue;
@ -755,6 +758,9 @@ pmap_cache_enter(vm_page_t m, vm_offset_t va)
("pmap_cache_enter: fake page"));
PMAP_STATS_INC(pmap_ncache_enter);
if (dcache_color_ignore != 0)
return (1);
/*
* Find the color for this virtual address and note the added mapping.
*/
@ -832,6 +838,9 @@ pmap_cache_remove(vm_page_t m, vm_offset_t va)
m->md.colors[DCACHE_COLOR(va)]));
PMAP_STATS_INC(pmap_ncache_remove);
if (dcache_color_ignore != 0)
return;
/*
* Find the color for this virtual address and note the removal of
* the mapping.
@ -900,7 +909,7 @@ pmap_kenter(vm_offset_t va, vm_page_t m)
va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
CTR5(KTR_SPARE2,
"pmap_kenter: off colour va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
va, VM_PAGE_TO_PHYS(m), m->object,
m->object ? m->object->type : -1,
m->pindex);
@ -1614,13 +1623,13 @@ pmap_zero_page(vm_page_t m)
("pmap_zero_page: fake page"));
PMAP_STATS_INC(pmap_nzero_page);
pa = VM_PAGE_TO_PHYS(m);
if (m->md.color == -1) {
PMAP_STATS_INC(pmap_nzero_page_nc);
aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
} else if (m->md.color == DCACHE_COLOR(pa)) {
if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
PMAP_STATS_INC(pmap_nzero_page_c);
va = TLB_PHYS_TO_DIRECT(pa);
cpu_block_zero((void *)va, PAGE_SIZE);
} else if (m->md.color == -1) {
PMAP_STATS_INC(pmap_nzero_page_nc);
aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
} else {
PMAP_STATS_INC(pmap_nzero_page_oc);
PMAP_LOCK(kernel_pmap);
@ -1646,13 +1655,13 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
PMAP_STATS_INC(pmap_nzero_page_area);
pa = VM_PAGE_TO_PHYS(m);
if (m->md.color == -1) {
PMAP_STATS_INC(pmap_nzero_page_area_nc);
aszero(ASI_PHYS_USE_EC, pa + off, size);
} else if (m->md.color == DCACHE_COLOR(pa)) {
if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
PMAP_STATS_INC(pmap_nzero_page_area_c);
va = TLB_PHYS_TO_DIRECT(pa);
bzero((void *)(va + off), size);
} else if (m->md.color == -1) {
PMAP_STATS_INC(pmap_nzero_page_area_nc);
aszero(ASI_PHYS_USE_EC, pa + off, size);
} else {
PMAP_STATS_INC(pmap_nzero_page_area_oc);
PMAP_LOCK(kernel_pmap);
@ -1677,13 +1686,13 @@ pmap_zero_page_idle(vm_page_t m)
("pmap_zero_page_idle: fake page"));
PMAP_STATS_INC(pmap_nzero_page_idle);
pa = VM_PAGE_TO_PHYS(m);
if (m->md.color == -1) {
PMAP_STATS_INC(pmap_nzero_page_idle_nc);
aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
} else if (m->md.color == DCACHE_COLOR(pa)) {
if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
PMAP_STATS_INC(pmap_nzero_page_idle_c);
va = TLB_PHYS_TO_DIRECT(pa);
cpu_block_zero((void *)va, PAGE_SIZE);
} else if (m->md.color == -1) {
PMAP_STATS_INC(pmap_nzero_page_idle_nc);
aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
} else {
PMAP_STATS_INC(pmap_nzero_page_idle_oc);
va = pmap_idle_map + (m->md.color * PAGE_SIZE);
@ -1711,15 +1720,16 @@ pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
PMAP_STATS_INC(pmap_ncopy_page);
pdst = VM_PAGE_TO_PHYS(mdst);
psrc = VM_PAGE_TO_PHYS(msrc);
if (msrc->md.color == -1 && mdst->md.color == -1) {
PMAP_STATS_INC(pmap_ncopy_page_nc);
ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
} else if (msrc->md.color == DCACHE_COLOR(psrc) &&
mdst->md.color == DCACHE_COLOR(pdst)) {
if (dcache_color_ignore != 0 ||
(msrc->md.color == DCACHE_COLOR(psrc) &&
mdst->md.color == DCACHE_COLOR(pdst))) {
PMAP_STATS_INC(pmap_ncopy_page_c);
vdst = TLB_PHYS_TO_DIRECT(pdst);
vsrc = TLB_PHYS_TO_DIRECT(psrc);
cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
} else if (msrc->md.color == -1 && mdst->md.color == -1) {
PMAP_STATS_INC(pmap_ncopy_page_nc);
ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
} else if (msrc->md.color == -1) {
if (mdst->md.color == DCACHE_COLOR(pdst)) {
PMAP_STATS_INC(pmap_ncopy_page_dc);

View File

@ -51,12 +51,13 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_param.h>
#include <machine/cache.h>
#include <machine/tlb.h>
/*
* Implement uiomove(9) from physical memory using a combination
* of the direct mapping and sf_bufs to reduce the creation and
* destruction of ephemeral mappings.
* destruction of ephemeral mappings.
*/
int
uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
@ -92,7 +93,8 @@ uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio)
cnt = ulmin(cnt, PAGE_SIZE - page_offset);
m = ma[offset >> PAGE_SHIFT];
pa = VM_PAGE_TO_PHYS(m);
if (m->md.color != DCACHE_COLOR(pa)) {
if (dcache_color_ignore == 0 &&
m->md.color != DCACHE_COLOR(pa)) {
sf = sf_buf_alloc(m, 0);
cp = (char *)sf_buf_kva(sf) + page_offset;
} else {

View File

@ -227,7 +227,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
void
cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
stack_t *stack)
stack_t *stack)
{
struct trapframe *tf;
uint64_t sp;
@ -251,7 +251,7 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
if (td == curthread)
flushw();
td->td_frame->tf_global[7] = (uint64_t) tls_base;
td->td_frame->tf_global[7] = (uint64_t)tls_base;
return (0);
}
@ -531,7 +531,7 @@ uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
}
pa = VM_PAGE_TO_PHYS(m);
if (m->md.color != DCACHE_COLOR(pa)) {
if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) {
KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0,
("uma_small_alloc: free page still has mappings!"));
PMAP_STATS_INC(uma_nsmall_alloc_oc);