Replace the vm_page hash table with a per-vmobject splay tree. There should

be no major change in performance from this change at this time but this
will allow other work to progress:  Giant lock removal around VM system
in favor of per-object mutexes, ranged fsyncs, more optimal COMMIT rpc's for
NFS, partial filesystem syncs by the syncer, more optimal object flushing,
etc.  Note that the buffer cache is already using a similar splay tree
mechanism.

Note that a good chunk of the old hash table code is still in the tree.
Alan or I will remove it prior to the release if the new code does not
introduce unsolvable bugs, else we can revert more easily.

Submitted by:	alc	(this is Alan's code)
Approved by:	re
This commit is contained in:
Matthew Dillon 2002-10-18 17:24:30 +00:00
parent 08076c086f
commit b86ec922be
4 changed files with 96 additions and 59 deletions

View File

@ -195,6 +195,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
TAILQ_INIT(&object->memq);
TAILQ_INIT(&object->shadow_head);
object->root = NULL;
object->type = type;
object->size = size;
object->ref_count = 1;

View File

@ -92,6 +92,7 @@ struct vm_object {
TAILQ_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
TAILQ_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
TAILQ_HEAD(, vm_page) memq; /* list of resident pages */
vm_page_t root; /* root of the resident page splay tree */
int generation; /* generation ID */
vm_pindex_t size; /* Object size */
int ref_count; /* How many refs?? */

View File

@ -319,25 +319,6 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
return (vaddr);
}
/*
* vm_page_hash:
*
* Distributes the object/offset key pair among hash buckets.
*
* NOTE: This macro depends on vm_page_bucket_count being a power of 2.
* This routine may not block.
*
* We try to randomize the hash based on the object to spread the pages
* out in the hash table without it costing us too much.
*/
static __inline int
vm_page_hash(vm_object_t object, vm_pindex_t pindex)
{
int i = ((uintptr_t)object + pindex) ^ object->hash_rand;
return (i & vm_page_hash_mask);
}
void
vm_page_flag_set(vm_page_t m, unsigned short bits)
{
@ -575,6 +556,63 @@ vm_page_undirty(vm_page_t m)
m->dirty = 0;
}
/*
* vm_page_splay:
*
* Implements Sleator and Tarjan's top-down splay algorithm. Returns
* the vm_page containing the given pindex. If, however, that
* pindex is not found in the vm_object, returns a vm_page that is
* adjacent to the pindex, coming before or after it.
*/
static vm_page_t
vm_page_splay(vm_pindex_t pindex, vm_page_t root)
{
struct vm_page dummy;
vm_page_t lefttreemax, righttreemin, y;
if (root == NULL)
return (root);
lefttreemax = righttreemin = &dummy;
for (;; root = y) {
if (pindex < root->pindex) {
if ((y = root->left) == NULL)
break;
if (pindex < y->pindex) {
/* Rotate right. */
root->left = y->right;
y->right = root;
root = y;
if ((y = root->left) == NULL)
break;
}
/* Link into the new root's right tree. */
righttreemin->left = root;
righttreemin = root;
} else if (pindex > root->pindex) {
if ((y = root->right) == NULL)
break;
if (pindex > y->pindex) {
/* Rotate left. */
root->right = y->left;
y->left = root;
root = y;
if ((y = root->right) == NULL)
break;
}
/* Link into the new root's left tree. */
lefttreemax->right = root;
lefttreemax = root;
} else
break;
}
/* Assemble the new root. */
lefttreemax->right = root->left;
righttreemin->left = root->right;
root->left = dummy.right;
root->right = dummy.left;
return (root);
}
/*
* vm_page_insert: [ internal use only ]
*
@ -591,7 +629,7 @@ vm_page_undirty(vm_page_t m)
void
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
{
struct vm_page **bucket;
vm_page_t root;
GIANT_REQUIRED;
@ -605,18 +643,25 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
m->pindex = pindex;
/*
* Insert it into the object_object/offset hash table
* Now link into the object's ordered list of backed pages.
*/
bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
mtx_lock_spin(&vm_page_buckets_mtx);
m->hnext = *bucket;
*bucket = m;
mtx_unlock_spin(&vm_page_buckets_mtx);
/*
* Now link into the object's list of backed pages.
*/
TAILQ_INSERT_TAIL(&object->memq, m, listq);
root = vm_page_splay(pindex, object->root);
if (root == NULL) {
m->left = NULL;
m->right = NULL;
TAILQ_INSERT_TAIL(&object->memq, m, listq);
} else if (pindex < root->pindex) {
m->left = root->left;
m->right = root;
root->left = NULL;
TAILQ_INSERT_BEFORE(root, m, listq);
} else {
m->right = root->right;
m->left = root;
root->right = NULL;
TAILQ_INSERT_AFTER(&object->memq, root, m, listq);
}
object->root = m;
object->generation++;
/*
@ -648,7 +693,7 @@ void
vm_page_remove(vm_page_t m)
{
vm_object_t object;
vm_page_t *bucket;
vm_page_t root;
GIANT_REQUIRED;
@ -666,24 +711,18 @@ vm_page_remove(vm_page_t m)
object = m->object;
/*
* Remove from the object_object/offset hash table. The object
* must be on the hash queue, we will panic if it isn't
*/
bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
mtx_lock_spin(&vm_page_buckets_mtx);
while (*bucket != m) {
if (*bucket == NULL)
panic("vm_page_remove(): page not found in hash");
bucket = &(*bucket)->hnext;
}
*bucket = m->hnext;
m->hnext = NULL;
mtx_unlock_spin(&vm_page_buckets_mtx);
/*
* Now remove from the object's list of backed pages.
*/
if (m != object->root)
vm_page_splay(m->pindex, object->root);
if (m->left == NULL)
root = m->right;
else {
root = vm_page_splay(m->pindex, m->left);
root->right = m->right;
}
object->root = root;
TAILQ_REMOVE(&object->memq, m, listq);
/*
@ -701,7 +740,7 @@ vm_page_remove(vm_page_t m)
* Returns the page associated with the object/offset
* pair specified; if none is found, NULL is returned.
*
* The object must be locked. No side effects.
* The object must be locked.
* This routine may not block.
* This is a critical path routine
*/
@ -709,17 +748,12 @@ vm_page_t
vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
{
vm_page_t m;
struct vm_page **bucket;
/*
* Search the hash table for this object/offset pair
*/
bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
mtx_lock_spin(&vm_page_buckets_mtx);
for (m = *bucket; m != NULL; m = m->hnext)
if (m->object == object && m->pindex == pindex)
break;
mtx_unlock_spin(&vm_page_buckets_mtx);
GIANT_REQUIRED;
m = vm_page_splay(pindex, object->root);
if ((object->root = m) != NULL && m->pindex != pindex)
m = NULL;
return (m);
}

View File

@ -110,8 +110,9 @@ TAILQ_HEAD(pglist, vm_page);
struct vm_page {
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (P) */
struct vm_page *hnext; /* hash table link (O,P) */
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
struct vm_page *left; /* splay tree link (O) */
struct vm_page *right; /* splay tree link (O) */
vm_object_t object; /* which object am I in (O,P)*/
vm_pindex_t pindex; /* offset into object (O,P) */