Remove the panic from vm_radix_insert() and propagate the error to the
callers of vm_page_insert(). The default action for every caller is to unwind-back the operation besides vm_page_rename() where this has proven to be impossible to do. For that case, it just spins until the page is not available to be allocated. However, due to vm_page_rename() to be mostly rare (and having never hit this panic in the past) it is tought to be a very seldom thing and not a possible performance factor. The patch has been tested with an atomic counter returning NULL from the zone allocator every 1/100000 allocations. Per-printf, I've verified that a typical buildkernel could trigger this 30 times. The patch survived to 2 hours of repeated buildkernel/world. Several technical notes: - The vm_page_insert() is moved, in several callers, closer to failure points. This could be committed separately before vmcontention hits the tree just to verify -CURRENT is happy with it. - vm_page_rename() does not need to have the page lock in the callers as it hide that as an implementation detail. Do the locking internally. - now vm_page_insert() returns an int, with 0 meaning everything was ok, thus KPI is broken by this patch.
This commit is contained in:
parent
def86adf8c
commit
6587a6afdd
@ -85,6 +85,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
|
||||
vm_map_entry_t entry;
|
||||
vm_pindex_t upindex;
|
||||
vm_prot_t prot;
|
||||
vm_page_bits_t vbits;
|
||||
boolean_t wired;
|
||||
|
||||
KASSERT((uaddr & PAGE_MASK) == 0,
|
||||
@ -95,6 +96,7 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
|
||||
* unwired in sf_buf_mext().
|
||||
*/
|
||||
kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
|
||||
vbits = kern_pg->valid;
|
||||
kern_pg->valid = VM_PAGE_BITS_ALL;
|
||||
KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
|
||||
("vm_pgmoveco: kern_pg is not correctly wired"));
|
||||
@ -105,6 +107,13 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
|
||||
return(EFAULT);
|
||||
}
|
||||
VM_OBJECT_LOCK(uobject);
|
||||
if (vm_page_insert(kern_pg, uobject, upindex) != 0) {
|
||||
kern_pg->valid = vbits;
|
||||
VM_OBJECT_UNLOCK(uobject);
|
||||
vm_map_lookup_done(map, entry);
|
||||
return(ENOMEM);
|
||||
}
|
||||
vm_page_dirty(kern_pg);
|
||||
retry:
|
||||
if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
|
||||
if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
|
||||
@ -122,8 +131,6 @@ vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
|
||||
if (uobject->backing_object != NULL)
|
||||
pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
|
||||
}
|
||||
vm_page_insert(kern_pg, uobject, upindex);
|
||||
vm_page_dirty(kern_pg);
|
||||
VM_OBJECT_UNLOCK(uobject);
|
||||
vm_map_lookup_done(map, entry);
|
||||
return(KERN_SUCCESS);
|
||||
|
@ -307,11 +307,14 @@ old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
|
||||
*/
|
||||
page = vm_page_getfake(paddr, memattr);
|
||||
VM_OBJECT_LOCK(object);
|
||||
if (vm_page_insert(page, object, offset) != 0) {
|
||||
vm_page_putfake(page);
|
||||
return (VM_PAGER_FAIL);
|
||||
}
|
||||
vm_page_lock(*mres);
|
||||
vm_page_free(*mres);
|
||||
vm_page_unlock(*mres);
|
||||
*mres = page;
|
||||
vm_page_insert(page, object, pidx);
|
||||
}
|
||||
page->valid = VM_PAGE_BITS_ALL;
|
||||
return (VM_PAGER_OK);
|
||||
|
@ -179,6 +179,10 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
|
||||
/* Construct a new fake page. */
|
||||
page = vm_page_getfake(paddr, memattr);
|
||||
VM_OBJECT_LOCK(object);
|
||||
if (vm_page_insert(page, object, offset) != 0) {
|
||||
vm_page_putfake(page);
|
||||
return (VM_PAGER_FAIL);
|
||||
}
|
||||
TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq);
|
||||
|
||||
/* Free the original pages and insert this fake page into the object. */
|
||||
@ -187,7 +191,6 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
|
||||
vm_page_free(m[i]);
|
||||
vm_page_unlock(m[i]);
|
||||
}
|
||||
vm_page_insert(page, object, offset);
|
||||
m[reqpage] = page;
|
||||
page->valid = VM_PAGE_BITS_ALL;
|
||||
|
||||
|
@ -769,9 +769,7 @@ RetryFault:;
|
||||
* process'es object. The page is
|
||||
* automatically made dirty.
|
||||
*/
|
||||
vm_page_lock(fs.m);
|
||||
vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
|
||||
vm_page_unlock(fs.m);
|
||||
vm_page_busy(fs.m);
|
||||
fs.first_m = fs.m;
|
||||
fs.m = NULL;
|
||||
|
@ -1397,9 +1397,7 @@ vm_object_split(vm_map_entry_t entry)
|
||||
vm_reserv_rename(m, new_object, orig_object,
|
||||
offidxstart);
|
||||
#endif
|
||||
vm_page_lock(m);
|
||||
vm_page_rename(m, new_object, idx);
|
||||
vm_page_unlock(m);
|
||||
/*
|
||||
* page automatically made dirty by rename and
|
||||
* cache handled
|
||||
@ -1654,9 +1652,7 @@ vm_object_backing_scan(vm_object_t object, int op)
|
||||
* If the page was mapped to a process, it can remain
|
||||
* mapped through the rename.
|
||||
*/
|
||||
vm_page_lock(p);
|
||||
vm_page_rename(p, object, new_pindex);
|
||||
vm_page_unlock(p);
|
||||
/* page automatically made dirty by rename */
|
||||
}
|
||||
}
|
||||
|
@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/uma.h>
|
||||
#include <vm/uma_int.h>
|
||||
|
||||
#include <machine/cpu.h>
|
||||
#include <machine/md_var.h>
|
||||
|
||||
/*
|
||||
@ -777,13 +778,15 @@ vm_page_dirty(vm_page_t m)
|
||||
* The object and page must be locked.
|
||||
* This routine may not block.
|
||||
*/
|
||||
void
|
||||
int
|
||||
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
|
||||
{
|
||||
vm_page_t neighbor;
|
||||
vm_pindex_t cpindex;
|
||||
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
|
||||
if (m->object != NULL)
|
||||
panic("vm_page_insert: page already inserted");
|
||||
cpindex = m->pindex;
|
||||
|
||||
/*
|
||||
* Record the object/offset pair in this page
|
||||
@ -804,8 +807,13 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
|
||||
} else
|
||||
TAILQ_INSERT_TAIL(&object->memq, m, listq);
|
||||
}
|
||||
if (vm_radix_insert(&object->rtree, pindex, m) != 0)
|
||||
panic("vm_page_insert: unable to insert the new page");
|
||||
|
||||
if (vm_radix_insert(&object->rtree, pindex, m) != 0) {
|
||||
TAILQ_REMOVE(&object->memq, m, listq);
|
||||
m->object = NULL;
|
||||
m->pindex = cpindex;
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
/*
|
||||
* show that the object has one more resident page.
|
||||
@ -823,6 +831,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
|
||||
*/
|
||||
if (m->aflags & PGA_WRITEABLE)
|
||||
vm_object_set_writeable_dirty(object);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -967,9 +976,20 @@ vm_page_prev(vm_page_t m)
|
||||
void
|
||||
vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
|
||||
{
|
||||
u_int i;
|
||||
|
||||
MPASS(m->object != NULL);
|
||||
VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
|
||||
VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
|
||||
|
||||
vm_page_lock(m);
|
||||
vm_page_remove(m);
|
||||
vm_page_insert(m, new_object, new_pindex);
|
||||
vm_page_unlock(m);
|
||||
while (vm_page_insert(m, new_object, new_pindex) != 0) {
|
||||
pagedaemon_wakeup();
|
||||
for (i = 0; i < 10000000; i++)
|
||||
cpu_spinwait();
|
||||
}
|
||||
vm_page_dirty(m);
|
||||
}
|
||||
|
||||
@ -1250,7 +1270,19 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
|
||||
if (object->memattr != VM_MEMATTR_DEFAULT &&
|
||||
object->type != OBJT_DEVICE && object->type != OBJT_SG)
|
||||
pmap_page_set_memattr(m, object->memattr);
|
||||
vm_page_insert(m, object, pindex);
|
||||
if (vm_page_insert(m, object, pindex) != 0) {
|
||||
|
||||
/* See the comment below about hold count handling. */
|
||||
if (vp != NULL)
|
||||
vdrop(vp);
|
||||
vm_page_lock(m);
|
||||
if (req & VM_ALLOC_WIRED)
|
||||
vm_page_unwire(m, 0);
|
||||
vm_page_free(m);
|
||||
vm_page_unlock(m);
|
||||
pagedaemon_wakeup();
|
||||
return (NULL);
|
||||
}
|
||||
} else
|
||||
m->pindex = pindex;
|
||||
|
||||
@ -1317,6 +1349,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
|
||||
{
|
||||
struct vnode *drop;
|
||||
vm_page_t deferred_vdrop_list, m, m_ret;
|
||||
vm_pindex_t cpindex;
|
||||
u_int flags, oflags;
|
||||
int req_class;
|
||||
|
||||
@ -1403,6 +1436,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
|
||||
memattr == VM_MEMATTR_DEFAULT)
|
||||
memattr = object->memattr;
|
||||
}
|
||||
cpindex = pindex;
|
||||
for (m = m_ret; m < &m_ret[npages]; m++) {
|
||||
m->aflags = 0;
|
||||
m->flags &= flags;
|
||||
@ -1412,12 +1446,30 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
|
||||
m->oflags = oflags;
|
||||
if (memattr != VM_MEMATTR_DEFAULT)
|
||||
pmap_page_set_memattr(m, memattr);
|
||||
if (object != NULL)
|
||||
vm_page_insert(m, object, pindex);
|
||||
else
|
||||
m->pindex = pindex;
|
||||
pindex++;
|
||||
}
|
||||
for (m = m_ret; m < &m_ret[npages]; m++) {
|
||||
if (object != NULL) {
|
||||
if (vm_page_insert(m, object, cpindex) != 0) {
|
||||
while (deferred_vdrop_list != NULL) {
|
||||
vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
|
||||
deferred_vdrop_list =
|
||||
deferred_vdrop_list->pageq.tqe_next;
|
||||
}
|
||||
for (m = m_ret; m < &m_ret[npages]; m++) {
|
||||
vm_page_lock(m);
|
||||
if (req & VM_ALLOC_WIRED)
|
||||
vm_page_unwire(m, 0);
|
||||
vm_page_free(m);
|
||||
vm_page_unlock(m);
|
||||
}
|
||||
pagedaemon_wakeup();
|
||||
return (NULL);
|
||||
}
|
||||
} else
|
||||
m->pindex = cpindex;
|
||||
cpindex++;
|
||||
}
|
||||
while (deferred_vdrop_list != NULL) {
|
||||
vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
|
||||
deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
|
||||
@ -2642,11 +2694,8 @@ vm_page_cowfault(vm_page_t m)
|
||||
pindex = m->pindex;
|
||||
|
||||
retry_alloc:
|
||||
pmap_remove_all(m);
|
||||
vm_page_remove(m);
|
||||
mnew = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
|
||||
if (mnew == NULL) {
|
||||
vm_page_insert(m, object, pindex);
|
||||
vm_page_unlock(m);
|
||||
VM_OBJECT_UNLOCK(object);
|
||||
VM_WAIT;
|
||||
@ -2672,8 +2721,9 @@ vm_page_cowfault(vm_page_t m)
|
||||
vm_page_lock(mnew);
|
||||
vm_page_free(mnew);
|
||||
vm_page_unlock(mnew);
|
||||
vm_page_insert(m, object, pindex);
|
||||
} else { /* clear COW & copy page */
|
||||
pmap_remove_all(m);
|
||||
vm_page_remove(m);
|
||||
if (!so_zerocp_fullpage)
|
||||
pmap_copy_page(m, mnew);
|
||||
mnew->valid = VM_PAGE_BITS_ALL;
|
||||
|
@ -386,7 +386,7 @@ void vm_page_dontneed(vm_page_t);
|
||||
void vm_page_deactivate (vm_page_t);
|
||||
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
|
||||
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
|
||||
void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
|
||||
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
|
||||
vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
|
||||
vm_page_t vm_page_next(vm_page_t m);
|
||||
int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
|
||||
|
@ -55,6 +55,7 @@
|
||||
#include <sys/kdb.h>
|
||||
|
||||
CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
|
||||
CTASSERT((sizeof(u_int) * NBBY) >= VM_RADIX_LIMIT);
|
||||
|
||||
static uma_zone_t vm_radix_node_zone;
|
||||
|
||||
@ -211,6 +212,24 @@ vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
|
||||
rtree->rt_root = root;
|
||||
}
|
||||
|
||||
static inline void
|
||||
vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
|
||||
struct vm_radix_node *iroot, int ilevel)
|
||||
{
|
||||
struct vm_radix_node *rnode;
|
||||
|
||||
CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
|
||||
rtree, root, iroot, ilevel);
|
||||
while (iroot != root && root != NULL) {
|
||||
rnode = root;
|
||||
MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
|
||||
rnode->rn_count = 0;
|
||||
root = rnode->rn_child[0];
|
||||
vm_radix_node_put(rnode);
|
||||
}
|
||||
vm_radix_setroot(rtree, iroot, ilevel);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
vm_radix_match(void *child, int color)
|
||||
{
|
||||
@ -262,10 +281,9 @@ vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
|
||||
int
|
||||
vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
|
||||
{
|
||||
struct vm_radix_node *rnode;
|
||||
struct vm_radix_node *root;
|
||||
int level;
|
||||
int slot;
|
||||
struct vm_radix_node *iroot, *rnode, *root;
|
||||
u_int allocmsk;
|
||||
int clev, ilevel, level, slot;
|
||||
|
||||
CTR3(KTR_VM,
|
||||
"insert: tree %p, index %ju, val %p", rtree, (uintmax_t)index, val);
|
||||
@ -276,6 +294,8 @@ vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
|
||||
* Increase the height by adding nodes at the root until
|
||||
* there is sufficient space.
|
||||
*/
|
||||
ilevel = level;
|
||||
iroot = root;
|
||||
while (level == 0 || index > VM_RADIX_MAX(level)) {
|
||||
CTR3(KTR_VM, "insert: expanding %ju > %ju height %d",
|
||||
(uintmax_t)index, (uintmax_t)VM_RADIX_MAX(level), level);
|
||||
@ -292,6 +312,8 @@ vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
|
||||
CTR4(KTR_VM,
|
||||
"insert: tree %p, root %p, index: %ju, level: %d ENOMEM",
|
||||
rtree, root, (uintmax_t)index, level);
|
||||
vm_radix_unwind_heightup(rtree, root, iroot,
|
||||
ilevel);
|
||||
return (ENOMEM);
|
||||
}
|
||||
/*
|
||||
@ -309,6 +331,8 @@ vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
|
||||
}
|
||||
|
||||
/* Now that the tree is tall enough, fill in the path to the index. */
|
||||
allocmsk = 0;
|
||||
clev = level;
|
||||
rnode = root;
|
||||
for (level = level - 1; level > 0; level--) {
|
||||
slot = vm_radix_slot(index, level);
|
||||
@ -324,9 +348,35 @@ vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
|
||||
"insert: tree %p, rnode %p, child %p, count %u ENOMEM",
|
||||
rtree, rnode, rnode->rn_child[slot],
|
||||
rnode->rn_count);
|
||||
MPASS(level != clev || allocmsk == 0);
|
||||
while (allocmsk != 0) {
|
||||
rnode = root;
|
||||
level = clev;
|
||||
level--;
|
||||
CTR4(KTR_VM,
|
||||
"insert: unwind root %p, level %d, slot %d, allocmsk: 0x%x",
|
||||
root, level, slot, allocmsk);
|
||||
slot = vm_radix_slot(index, level);
|
||||
MPASS(level >= (ffs(allocmsk) - 1));
|
||||
while (level > (ffs(allocmsk) - 1)) {
|
||||
MPASS(level > 0);
|
||||
slot = vm_radix_slot(index,
|
||||
level);
|
||||
rnode = rnode->rn_child[slot];
|
||||
level--;
|
||||
}
|
||||
MPASS((allocmsk & (1 << level)) != 0);
|
||||
allocmsk &= ~(1 << level);
|
||||
rnode->rn_count--;
|
||||
vm_radix_node_put(rnode->rn_child[slot]);
|
||||
rnode->rn_child[slot] = NULL;
|
||||
}
|
||||
vm_radix_unwind_heightup(rtree, root, iroot,
|
||||
ilevel);
|
||||
return (ENOMEM);
|
||||
}
|
||||
rnode->rn_count++;
|
||||
allocmsk |= (1 << level);
|
||||
}
|
||||
CTR5(KTR_VM,
|
||||
"insert: tree %p, index %ju, level %d, slot %d, rnode %p",
|
||||
|
Loading…
Reference in New Issue
Block a user