Replace the vm_page hash table with a per-vmobject splay tree. There should
be no major change in performance from this change at this time but this will allow other work to progress: Giant lock removal around VM system in favor of per-object mutexes, ranged fsyncs, more optimal COMMIT rpc's for NFS, partial filesystem syncs by the syncer, more optimal object flushing, etc. Note that the buffer cache is already using a similar splay tree mechanism. Note that a good chunk of the old hash table code is still in the tree. Alan or I will remove it prior to the release if the new code does not introduce unsolvable bugs, else we can revert more easily. Submitted by: alc (this is Alan's code) Approved by: re
This commit is contained in:
parent
bf5cd60a89
commit
277583f7f8
@ -195,6 +195,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
|
||||
TAILQ_INIT(&object->memq);
|
||||
TAILQ_INIT(&object->shadow_head);
|
||||
|
||||
object->root = NULL;
|
||||
object->type = type;
|
||||
object->size = size;
|
||||
object->ref_count = 1;
|
||||
|
@ -92,6 +92,7 @@ struct vm_object {
|
||||
TAILQ_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
|
||||
TAILQ_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
|
||||
TAILQ_HEAD(, vm_page) memq; /* list of resident pages */
|
||||
vm_page_t root; /* root of the resident page splay tree */
|
||||
int generation; /* generation ID */
|
||||
vm_pindex_t size; /* Object size */
|
||||
int ref_count; /* How many refs?? */
|
||||
|
150
sys/vm/vm_page.c
150
sys/vm/vm_page.c
@ -319,25 +319,6 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
|
||||
return (vaddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_page_hash:
|
||||
*
|
||||
* Distributes the object/offset key pair among hash buckets.
|
||||
*
|
||||
* NOTE: This macro depends on vm_page_bucket_count being a power of 2.
|
||||
* This routine may not block.
|
||||
*
|
||||
* We try to randomize the hash based on the object to spread the pages
|
||||
* out in the hash table without it costing us too much.
|
||||
*/
|
||||
static __inline int
|
||||
vm_page_hash(vm_object_t object, vm_pindex_t pindex)
|
||||
{
|
||||
int i = ((uintptr_t)object + pindex) ^ object->hash_rand;
|
||||
|
||||
return (i & vm_page_hash_mask);
|
||||
}
|
||||
|
||||
void
|
||||
vm_page_flag_set(vm_page_t m, unsigned short bits)
|
||||
{
|
||||
@ -575,6 +556,63 @@ vm_page_undirty(vm_page_t m)
|
||||
m->dirty = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_page_splay:
|
||||
*
|
||||
* Implements Sleator and Tarjan's top-down splay algorithm. Returns
|
||||
* the vm_page containing the given pindex. If, however, that
|
||||
* pindex is not found in the vm_object, returns a vm_page that is
|
||||
* adjacent to the pindex, coming before or after it.
|
||||
*/
|
||||
static vm_page_t
|
||||
vm_page_splay(vm_pindex_t pindex, vm_page_t root)
|
||||
{
|
||||
struct vm_page dummy;
|
||||
vm_page_t lefttreemax, righttreemin, y;
|
||||
|
||||
if (root == NULL)
|
||||
return (root);
|
||||
lefttreemax = righttreemin = &dummy;
|
||||
for (;; root = y) {
|
||||
if (pindex < root->pindex) {
|
||||
if ((y = root->left) == NULL)
|
||||
break;
|
||||
if (pindex < y->pindex) {
|
||||
/* Rotate right. */
|
||||
root->left = y->right;
|
||||
y->right = root;
|
||||
root = y;
|
||||
if ((y = root->left) == NULL)
|
||||
break;
|
||||
}
|
||||
/* Link into the new root's right tree. */
|
||||
righttreemin->left = root;
|
||||
righttreemin = root;
|
||||
} else if (pindex > root->pindex) {
|
||||
if ((y = root->right) == NULL)
|
||||
break;
|
||||
if (pindex > y->pindex) {
|
||||
/* Rotate left. */
|
||||
root->right = y->left;
|
||||
y->left = root;
|
||||
root = y;
|
||||
if ((y = root->right) == NULL)
|
||||
break;
|
||||
}
|
||||
/* Link into the new root's left tree. */
|
||||
lefttreemax->right = root;
|
||||
lefttreemax = root;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
/* Assemble the new root. */
|
||||
lefttreemax->right = root->left;
|
||||
righttreemin->left = root->right;
|
||||
root->left = dummy.right;
|
||||
root->right = dummy.left;
|
||||
return (root);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_page_insert: [ internal use only ]
|
||||
*
|
||||
@ -591,7 +629,7 @@ vm_page_undirty(vm_page_t m)
|
||||
void
|
||||
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
|
||||
{
|
||||
struct vm_page **bucket;
|
||||
vm_page_t root;
|
||||
|
||||
GIANT_REQUIRED;
|
||||
|
||||
@ -605,18 +643,25 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
|
||||
m->pindex = pindex;
|
||||
|
||||
/*
|
||||
* Insert it into the object_object/offset hash table
|
||||
* Now link into the object's ordered list of backed pages.
|
||||
*/
|
||||
bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
|
||||
mtx_lock_spin(&vm_page_buckets_mtx);
|
||||
m->hnext = *bucket;
|
||||
*bucket = m;
|
||||
mtx_unlock_spin(&vm_page_buckets_mtx);
|
||||
|
||||
/*
|
||||
* Now link into the object's list of backed pages.
|
||||
*/
|
||||
TAILQ_INSERT_TAIL(&object->memq, m, listq);
|
||||
root = vm_page_splay(pindex, object->root);
|
||||
if (root == NULL) {
|
||||
m->left = NULL;
|
||||
m->right = NULL;
|
||||
TAILQ_INSERT_TAIL(&object->memq, m, listq);
|
||||
} else if (pindex < root->pindex) {
|
||||
m->left = root->left;
|
||||
m->right = root;
|
||||
root->left = NULL;
|
||||
TAILQ_INSERT_BEFORE(root, m, listq);
|
||||
} else {
|
||||
m->right = root->right;
|
||||
m->left = root;
|
||||
root->right = NULL;
|
||||
TAILQ_INSERT_AFTER(&object->memq, root, m, listq);
|
||||
}
|
||||
object->root = m;
|
||||
object->generation++;
|
||||
|
||||
/*
|
||||
@ -648,7 +693,7 @@ void
|
||||
vm_page_remove(vm_page_t m)
|
||||
{
|
||||
vm_object_t object;
|
||||
vm_page_t *bucket;
|
||||
vm_page_t root;
|
||||
|
||||
GIANT_REQUIRED;
|
||||
|
||||
@ -666,24 +711,18 @@ vm_page_remove(vm_page_t m)
|
||||
|
||||
object = m->object;
|
||||
|
||||
/*
|
||||
* Remove from the object_object/offset hash table. The object
|
||||
* must be on the hash queue, we will panic if it isn't
|
||||
*/
|
||||
bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
|
||||
mtx_lock_spin(&vm_page_buckets_mtx);
|
||||
while (*bucket != m) {
|
||||
if (*bucket == NULL)
|
||||
panic("vm_page_remove(): page not found in hash");
|
||||
bucket = &(*bucket)->hnext;
|
||||
}
|
||||
*bucket = m->hnext;
|
||||
m->hnext = NULL;
|
||||
mtx_unlock_spin(&vm_page_buckets_mtx);
|
||||
|
||||
/*
|
||||
* Now remove from the object's list of backed pages.
|
||||
*/
|
||||
if (m != object->root)
|
||||
vm_page_splay(m->pindex, object->root);
|
||||
if (m->left == NULL)
|
||||
root = m->right;
|
||||
else {
|
||||
root = vm_page_splay(m->pindex, m->left);
|
||||
root->right = m->right;
|
||||
}
|
||||
object->root = root;
|
||||
TAILQ_REMOVE(&object->memq, m, listq);
|
||||
|
||||
/*
|
||||
@ -701,7 +740,7 @@ vm_page_remove(vm_page_t m)
|
||||
* Returns the page associated with the object/offset
|
||||
* pair specified; if none is found, NULL is returned.
|
||||
*
|
||||
* The object must be locked. No side effects.
|
||||
* The object must be locked.
|
||||
* This routine may not block.
|
||||
* This is a critical path routine
|
||||
*/
|
||||
@ -709,17 +748,12 @@ vm_page_t
|
||||
vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
|
||||
{
|
||||
vm_page_t m;
|
||||
struct vm_page **bucket;
|
||||
|
||||
/*
|
||||
* Search the hash table for this object/offset pair
|
||||
*/
|
||||
bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
|
||||
mtx_lock_spin(&vm_page_buckets_mtx);
|
||||
for (m = *bucket; m != NULL; m = m->hnext)
|
||||
if (m->object == object && m->pindex == pindex)
|
||||
break;
|
||||
mtx_unlock_spin(&vm_page_buckets_mtx);
|
||||
GIANT_REQUIRED;
|
||||
|
||||
m = vm_page_splay(pindex, object->root);
|
||||
if ((object->root = m) != NULL && m->pindex != pindex)
|
||||
m = NULL;
|
||||
return (m);
|
||||
}
|
||||
|
||||
|
@ -110,8 +110,9 @@ TAILQ_HEAD(pglist, vm_page);
|
||||
|
||||
struct vm_page {
|
||||
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (P) */
|
||||
struct vm_page *hnext; /* hash table link (O,P) */
|
||||
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
|
||||
struct vm_page *left; /* splay tree link (O) */
|
||||
struct vm_page *right; /* splay tree link (O) */
|
||||
|
||||
vm_object_t object; /* which object am I in (O,P)*/
|
||||
vm_pindex_t pindex; /* offset into object (O,P) */
|
||||
|
Loading…
Reference in New Issue
Block a user