Replace the SLB backing store splay tree used on 64-bit PowerPC AIM

hardware with a lockless sparse tree design. This marginally improves
the performance of PMAP and allows copyin()/copyout() to run without
acquiring locks when used on wired mappings.

Submitted by:	mdf
This commit is contained in:
Nathan Whitehorn 2010-09-16 00:22:25 +00:00
parent b3f7949dc5
commit 95fa3335e1
6 changed files with 370 additions and 166 deletions

View File

@ -80,16 +80,28 @@ int setfault(faultbuf); /* defined in locore.S */
static __inline void
set_user_sr(pmap_t pm, const void *addr)
{
struct slb *slb;
register_t esid, vsid, slb1, slb2;
esid = USER_ADDR >> ADDR_SR_SHFT;
PMAP_LOCK(pm);
vsid = va_to_vsid(pm, (vm_offset_t)addr);
PMAP_UNLOCK(pm);
/* Try lockless look-up first */
slb = user_va_to_slb_entry(pm, (vm_offset_t)addr);
if (slb == NULL) {
/* If it isn't there, we need to pre-fault the VSID */
PMAP_LOCK(pm);
vsid = va_to_vsid(pm, (vm_offset_t)addr);
PMAP_UNLOCK(pm);
} else {
vsid = slb->slbv >> SLBV_VSID_SHIFT;
}
slb1 = vsid << SLBV_VSID_SHIFT;
slb2 = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | USER_SR;
curthread->td_pcb->pcb_cpu.aim.usr_segm =
(uintptr_t)addr >> ADDR_SR_SHFT;
__asm __volatile ("slbie %0; slbmte %1, %2" :: "r"(esid << 28),
"r"(slb1), "r"(slb2));
isync();

View File

@ -2097,7 +2097,7 @@ moea64_pinit(mmu_t mmu, pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
SPLAY_INIT(&pmap->pm_slbtree);
pmap->pm_slb_tree_root = slb_alloc_tree();
pmap->pm_slb = slb_alloc_user_cache();
}
#else
@ -2252,7 +2252,7 @@ moea64_release(mmu_t mmu, pmap_t pmap)
* Free segment registers' VSIDs
*/
#ifdef __powerpc64__
free_vsids(pmap);
slb_free_tree(pmap);
slb_free_user_cache(pmap->pm_slb);
#else
KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
@ -2622,18 +2622,25 @@ moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
int ptegidx;
uint64_t vsid;
#ifdef __powerpc64__
struct slb slb;
uint64_t slbv;
/* The page is not mapped if the segment isn't */
if (va_to_slb_entry(pm, va, &slb) != 0)
return NULL;
if (pm == kernel_pmap) {
slbv = kernel_va_to_slbv(va);
} else {
struct slb *slb;
slb = user_va_to_slb_entry(pm, va);
/* The page is not mapped if the segment isn't */
if (slb == NULL)
return NULL;
slbv = slb->slbv;
}
vsid = (slb.slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
if (slb.slbv & SLBV_L)
vsid = (slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
if (slbv & SLBV_L)
va &= ~moea64_large_page_mask;
else
va &= ~ADDR_POFF;
ptegidx = va_to_pteg(vsid, va, slb.slbv & SLBV_L);
ptegidx = va_to_pteg(vsid, va, slbv & SLBV_L);
#else
va &= ~ADDR_POFF;
vsid = va_to_vsid(pm, va);

View File

@ -32,7 +32,6 @@
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/tree.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@ -45,65 +44,212 @@
uintptr_t moea64_get_unique_vsid(void);
void moea64_release_vsid(uint64_t vsid);
struct slbcontainer {
struct slb slb;
SPLAY_ENTRY(slbcontainer) slb_node;
};
static int slb_compare(struct slbcontainer *a, struct slbcontainer *b);
static void slb_zone_init(void *);
SPLAY_PROTOTYPE(slb_tree, slbcontainer, slb_node, slb_compare);
SPLAY_GENERATE(slb_tree, slbcontainer, slb_node, slb_compare);
uma_zone_t slb_zone;
uma_zone_t slbt_zone;
uma_zone_t slb_cache_zone;
SYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL);
int
va_to_slb_entry(pmap_t pm, vm_offset_t va, struct slb *slb)
struct slbtnode {
uint16_t ua_alloc;
uint8_t ua_level;
/* Only 36 bits needed for full 64-bit address space. */
uint64_t ua_base;
union {
struct slbtnode *ua_child[16];
struct slb slb_entries[16];
} u;
};
/*
* For a full 64-bit address space, there are 36 bits in play in an
* esid, so 8 levels, with the leaf being at level 0.
*
* |3333|3322|2222|2222|1111|1111|11 | | | esid
* |5432|1098|7654|3210|9876|5432|1098|7654|3210| bits
* +----+----+----+----+----+----+----+----+----+--------
* | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | level
*/
#define UAD_ROOT_LEVEL 8
#define UAD_LEAF_LEVEL 0
static inline int
esid2idx(uint64_t esid, int level)
{
struct slbcontainer cont, *found;
uint64_t esid;
int shift;
shift = level * 4;
return ((esid >> shift) & 0xF);
}
/*
* The ua_base field should have 0 bits after the first 4*(level+1)
* bits; i.e. only
*/
#define uad_baseok(ua) \
(esid2base(ua->ua_base, ua->ua_level) == ua->ua_base)
static inline uint64_t
esid2base(uint64_t esid, int level)
{
uint64_t mask;
int shift;
shift = (level + 1) * 4;
mask = ~((1ULL << shift) - 1);
return (esid & mask);
}
/*
* Allocate a new leaf node for the specified esid/vmhandle from the
* parent node.
*/
static struct slb *
make_new_leaf(uint64_t esid, uint64_t slbv, struct slbtnode *parent)
{
struct slbtnode *child;
struct slb *retval;
int idx;
idx = esid2idx(esid, parent->ua_level);
KASSERT(parent->u.ua_child[idx] == NULL, ("Child already exists!"));
/* unlock and M_WAITOK and loop? */
child = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
KASSERT(child != NULL, ("unhandled NULL case"));
child->ua_level = UAD_LEAF_LEVEL;
child->ua_base = esid2base(esid, child->ua_level);
idx = esid2idx(esid, child->ua_level);
child->u.slb_entries[idx].slbv = slbv;
child->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
setbit(&child->ua_alloc, idx);
retval = &child->u.slb_entries[idx];
/*
* The above stores must be visible before the next one, so
* that a lockless searcher always sees a valid path through
* the tree.
*/
powerpc_sync();
idx = esid2idx(esid, parent->ua_level);
parent->u.ua_child[idx] = child;
setbit(&parent->ua_alloc, idx);
return (retval);
}
/*
* Allocate a new intermediate node to fit between the parent and
* esid.
*/
static struct slbtnode*
make_intermediate(uint64_t esid, struct slbtnode *parent)
{
struct slbtnode *child, *inter;
int idx, level;
idx = esid2idx(esid, parent->ua_level);
child = parent->u.ua_child[idx];
KASSERT(esid2base(esid, child->ua_level) != child->ua_base,
("No need for an intermediate node?"));
/*
* Find the level where the existing child and our new esid
* meet. It must be lower than parent->ua_level or we would
* have chosen a different index in parent.
*/
level = child->ua_level + 1;
while (esid2base(esid, level) !=
esid2base(child->ua_base, level))
level++;
KASSERT(level < parent->ua_level,
("Found splitting level %d for %09jx and %09jx, "
"but it's the same as %p's",
level, esid, child->ua_base, parent));
/* unlock and M_WAITOK and loop? */
inter = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
KASSERT(inter != NULL, ("unhandled NULL case"));
/* Set up intermediate node to point to child ... */
inter->ua_level = level;
inter->ua_base = esid2base(esid, inter->ua_level);
idx = esid2idx(child->ua_base, inter->ua_level);
inter->u.ua_child[idx] = child;
setbit(&inter->ua_alloc, idx);
powerpc_sync();
/* Set up parent to point to intermediate node ... */
idx = esid2idx(inter->ua_base, parent->ua_level);
parent->u.ua_child[idx] = inter;
setbit(&parent->ua_alloc, idx);
return (inter);
}
uint64_t
kernel_va_to_slbv(vm_offset_t va)
{
uint64_t esid, slbv;
esid = (uintptr_t)va >> ADDR_SR_SHFT;
slb->slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
if (pm == kernel_pmap) {
/* Set kernel VSID to deterministic value */
slb->slbv = va_to_vsid(kernel_pmap, va) << SLBV_VSID_SHIFT;
/* Set kernel VSID to deterministic value */
slbv = va_to_vsid(kernel_pmap, va) << SLBV_VSID_SHIFT;
/* Figure out if this is a large-page mapping */
if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
/*
* XXX: If we have set up a direct map, assumes
* all physical memory is mapped with large pages.
*/
if (mem_valid(va, 0) == 0)
slb->slbv |= SLBV_L;
}
return (0);
/* Figure out if this is a large-page mapping */
if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
/*
* XXX: If we have set up a direct map, assumes
* all physical memory is mapped with large pages.
*/
if (mem_valid(va, 0) == 0)
slbv |= SLBV_L;
}
return (slbv);
}
struct slb *
user_va_to_slb_entry(pmap_t pm, vm_offset_t va)
{
uint64_t esid = va >> ADDR_SR_SHFT;
struct slbtnode *ua;
int idx;
ua = pm->pm_slb_tree_root;
for (;;) {
KASSERT(uad_baseok(ua), ("uad base %016jx level %d bad!",
ua->ua_base, ua->ua_level));
idx = esid2idx(esid, ua->ua_level);
/*
* This code is specific to ppc64 where a load is
* atomic, so no need for atomic_load macro.
*/
if (ua->ua_level == UAD_LEAF_LEVEL)
return ((ua->u.slb_entries[idx].slbe & SLBE_VALID) ?
&ua->u.slb_entries[idx] : NULL);
ua = ua->u.ua_child[idx];
if (ua == NULL ||
esid2base(esid, ua->ua_level) != ua->ua_base)
return (NULL);
}
PMAP_LOCK_ASSERT(pm, MA_OWNED);
cont.slb.slbe = slb->slbe;
found = SPLAY_FIND(slb_tree, &pm->pm_slbtree, &cont);
if (found == NULL)
return (-1);
slb->slbv = found->slb.slbv;
return (0);
return (NULL);
}
uint64_t
va_to_vsid(pmap_t pm, vm_offset_t va)
{
struct slb entry;
struct slb *entry;
/* Shortcut kernel case */
if (pm == kernel_pmap)
@ -114,56 +260,149 @@ va_to_vsid(pmap_t pm, vm_offset_t va)
* to the PMAP's segment table.
*/
if (va_to_slb_entry(pm, va, &entry) != 0)
entry = user_va_to_slb_entry(pm, va);
if (entry == NULL)
return (allocate_vsid(pm, (uintptr_t)va >> ADDR_SR_SHFT, 0));
return ((entry.slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
return ((entry->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
}
uint64_t
allocate_vsid(pmap_t pm, uint64_t esid, int large)
{
uint64_t vsid;
struct slbcontainer *slb_entry, kern_entry;
struct slb *prespill;
uint64_t vsid, slbv;
struct slbtnode *ua, *next, *inter;
struct slb *slb;
int idx;
prespill = NULL;
KASSERT(pm != kernel_pmap, ("Attempting to allocate a kernel VSID"));
if (pm == kernel_pmap) {
vsid = va_to_vsid(pm, esid << ADDR_SR_SHFT);
slb_entry = &kern_entry;
prespill = PCPU_GET(slb);
} else {
vsid = moea64_get_unique_vsid();
slb_entry = uma_zalloc(slb_zone, M_NOWAIT);
if (slb_entry == NULL)
panic("Could not allocate SLB mapping!");
prespill = pm->pm_slb;
}
slb_entry->slb.slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
slb_entry->slb.slbv = vsid << SLBV_VSID_SHIFT;
PMAP_LOCK_ASSERT(pm, MA_OWNED);
vsid = moea64_get_unique_vsid();
slbv = vsid << SLBV_VSID_SHIFT;
if (large)
slb_entry->slb.slbv |= SLBV_L;
slbv |= SLBV_L;
if (pm != kernel_pmap) {
PMAP_LOCK_ASSERT(pm, MA_OWNED);
SPLAY_INSERT(slb_tree, &pm->pm_slbtree, slb_entry);
ua = pm->pm_slb_tree_root;
/* Descend to the correct leaf or NULL pointer. */
for (;;) {
KASSERT(uad_baseok(ua),
("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
idx = esid2idx(esid, ua->ua_level);
if (ua->ua_level == UAD_LEAF_LEVEL) {
ua->u.slb_entries[idx].slbv = slbv;
eieio();
ua->u.slb_entries[idx].slbe = (esid << SLBE_ESID_SHIFT)
| SLBE_VALID;
setbit(&ua->ua_alloc, idx);
slb = &ua->u.slb_entries[idx];
break;
}
next = ua->u.ua_child[idx];
if (next == NULL) {
slb = make_new_leaf(esid, slbv, ua);
break;
}
/*
* Check if the next item down has an okay ua_base.
* If not, we need to allocate an intermediate node.
*/
if (esid2base(esid, next->ua_level) != next->ua_base) {
inter = make_intermediate(esid, ua);
slb = make_new_leaf(esid, slbv, inter);
break;
}
ua = next;
}
/*
* Someone probably wants this soon, and it may be a wired
* SLB mapping, so pre-spill this entry.
*/
if (prespill != NULL)
slb_insert(pm, prespill, &slb_entry->slb);
eieio();
slb_insert(pm, pm->pm_slb, slb);
return (vsid);
}
void
free_vsid(pmap_t pm, uint64_t esid, int large)
{
struct slbtnode *ua;
int idx;
PMAP_LOCK_ASSERT(pm, MA_OWNED);
ua = pm->pm_slb_tree_root;
/* Descend to the correct leaf. */
for (;;) {
KASSERT(uad_baseok(ua),
("uad base %09jx level %d bad!", ua->ua_base, ua->ua_level));
idx = esid2idx(esid, ua->ua_level);
if (ua->ua_level == UAD_LEAF_LEVEL) {
ua->u.slb_entries[idx].slbv = 0;
eieio();
ua->u.slb_entries[idx].slbe = 0;
clrbit(&ua->ua_alloc, idx);
return;
}
ua = ua->u.ua_child[idx];
if (ua == NULL ||
esid2base(esid, ua->ua_level) != ua->ua_base) {
/* Perhaps just return instead of assert? */
KASSERT(0,
("Asked to remove an entry that was never inserted!"));
return;
}
}
}
static void
free_slb_tree_node(struct slbtnode *ua)
{
int idx;
for (idx = 0; idx < 16; idx++) {
if (ua->ua_level != UAD_LEAF_LEVEL) {
if (ua->u.ua_child[idx] != NULL)
free_slb_tree_node(ua->u.ua_child[idx]);
} else {
if (ua->u.slb_entries[idx].slbv != 0)
moea64_release_vsid(ua->u.slb_entries[idx].slbv
>> SLBV_VSID_SHIFT);
}
}
uma_zfree(slbt_zone, ua);
}
void
slb_free_tree(pmap_t pm)
{
free_slb_tree_node(pm->pm_slb_tree_root);
}
struct slbtnode *
slb_alloc_tree(void)
{
struct slbtnode *root;
root = uma_zalloc(slbt_zone, M_NOWAIT | M_ZERO);
root->ua_level = UAD_ROOT_LEVEL;
return (root);
}
/* Lock entries mapping kernel text and stacks */
#define SLB_SPILLABLE(slbe) \
@ -222,62 +461,12 @@ slb_insert(pmap_t pm, struct slb *slbcache, struct slb *slb_entry)
critical_exit();
}
int
vsid_to_esid(pmap_t pm, uint64_t vsid, uint64_t *esid)
{
uint64_t slbv;
struct slbcontainer *entry;
#ifdef INVARIANTS
if (pm == kernel_pmap)
panic("vsid_to_esid only works on user pmaps");
PMAP_LOCK_ASSERT(pm, MA_OWNED);
#endif
slbv = vsid << SLBV_VSID_SHIFT;
SPLAY_FOREACH(entry, slb_tree, &pm->pm_slbtree) {
if (slbv == entry->slb.slbv) {
*esid = entry->slb.slbe >> SLBE_ESID_SHIFT;
return (0);
}
}
return (-1);
}
void
free_vsids(pmap_t pm)
{
struct slbcontainer *entry;
while (!SPLAY_EMPTY(&pm->pm_slbtree)) {
entry = SPLAY_MIN(slb_tree, &pm->pm_slbtree);
SPLAY_REMOVE(slb_tree, &pm->pm_slbtree, entry);
moea64_release_vsid(entry->slb.slbv >> SLBV_VSID_SHIFT);
uma_zfree(slb_zone, entry);
}
}
static int
slb_compare(struct slbcontainer *a, struct slbcontainer *b)
{
if (a->slb.slbe == b->slb.slbe)
return (0);
else if (a->slb.slbe < b->slb.slbe)
return (-1);
else
return (1);
}
static void
slb_zone_init(void *dummy)
{
slb_zone = uma_zcreate("SLB segment", sizeof(struct slbcontainer),
slbt_zone = uma_zcreate("SLB tree node", sizeof(struct slbtnode),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
slb_cache_zone = uma_zcreate("SLB cache", 64*sizeof(struct slb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);

View File

@ -445,33 +445,37 @@ syscall(struct trapframe *frame)
static int
handle_slb_spill(pmap_t pm, vm_offset_t addr)
{
struct slb slb_entry;
int error, i;
struct slb kern_entry, *user_entry;
uint64_t esid;
int i;
esid = (uintptr_t)addr >> ADDR_SR_SHFT;
if (pm == kernel_pmap) {
error = va_to_slb_entry(pm, addr, &slb_entry);
if (error)
return (error);
kern_entry.slbv = kernel_va_to_slbv(addr);
kern_entry.slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
slb_insert(pm, PCPU_GET(slb), &slb_entry);
slb_insert(pm, PCPU_GET(slb), &kern_entry);
return (0);
}
PMAP_LOCK(pm);
error = va_to_slb_entry(pm, addr, &slb_entry);
if (error != 0)
(void)allocate_vsid(pm, (uintptr_t)addr >> ADDR_SR_SHFT, 0);
else {
user_entry = user_va_to_slb_entry(pm, addr);
if (user_entry == NULL) {
/* allocate_vsid auto-spills it */
(void)allocate_vsid(pm, esid, 0);
} else {
/*
* Check that another CPU has not already mapped this.
* XXX: Per-thread SLB caches would be better.
*/
for (i = 0; i < 64; i++)
if (pm->pm_slb[i].slbe == (slb_entry.slbe | i))
if (pm->pm_slb[i].slbe == (user_entry->slbe | i))
break;
if (i == 64)
slb_insert(pm, pm->pm_slb, &slb_entry);
slb_insert(pm, pm->pm_slb, user_entry);
}
PMAP_UNLOCK(pm);
@ -513,19 +517,7 @@ trap_pfault(struct trapframe *frame, int user)
map = &p->p_vmspace->vm_map;
#ifdef __powerpc64__
user_sr = 0;
__asm ("slbmfev %0, %1"
: "=r"(user_sr)
: "r"(USER_SR));
PMAP_LOCK(&p->p_vmspace->vm_pmap);
user_sr >>= SLBV_VSID_SHIFT;
rv = vsid_to_esid(&p->p_vmspace->vm_pmap, user_sr,
&user_sr);
PMAP_UNLOCK(&p->p_vmspace->vm_pmap);
if (rv != 0)
return (SIGSEGV);
user_sr = td->td_pcb->pcb_cpu.aim.usr_segm;
#else
__asm ("mfsr %0, %1"
: "=r"(user_sr)

View File

@ -66,6 +66,7 @@ struct pcb {
union {
struct {
vm_offset_t usr_segm; /* Base address */
register_t usr_esid; /* USER_SR segment */
register_t usr_vsid; /* USER_SR segment */
} aim;

View File

@ -86,15 +86,13 @@ struct pmap_md {
#define NPMAPS 32768
#endif /* !defined(NPMAPS) */
struct slbcontainer;
SPLAY_HEAD(slb_tree, slbcontainer);
struct slbtnode;
struct pmap {
struct mtx pm_mtx;
#ifdef __powerpc64__
struct slb_tree pm_slbtree;
struct slbtnode *pm_slb_tree_root;
struct slb *pm_slb;
#else
register_t pm_sr[16];
@ -139,12 +137,17 @@ struct md_page {
* NB: The PMAP MUST be locked already.
*/
uint64_t va_to_vsid(pmap_t pm, vm_offset_t va);
int va_to_slb_entry(pmap_t pm, vm_offset_t va, struct slb *);
/* Lock-free, non-allocating lookup routines */
uint64_t kernel_va_to_slbv(vm_offset_t va);
struct slb *user_va_to_slb_entry(pmap_t pm, vm_offset_t va);
uint64_t allocate_vsid(pmap_t pm, uint64_t esid, int large);
void slb_insert(pmap_t pm, struct slb *dst, struct slb *);
int vsid_to_esid(pmap_t pm, uint64_t vsid, uint64_t *esid);
void free_vsids(pmap_t pm);
void free_vsid(pmap_t pm, uint64_t esid, int large);
void slb_insert(pmap_t pm, struct slb *dst, struct slb *);
struct slbtnode *slb_alloc_tree(void);
void slb_free_tree(pmap_t pm);
struct slb *slb_alloc_user_cache(void);
void slb_free_user_cache(struct slb *);