From 482b6818afd4db4c0be3027d637df4d534bc9eee Mon Sep 17 00:00:00 2001 From: alc Date: Fri, 13 Aug 2004 08:06:34 +0000 Subject: [PATCH] Replace the linear search in vm_map_findspace() with an O(log n) algorithm built into the map entry splay tree. This replaces the first_free hint in struct vm_map with two fields in vm_map_entry: adj_free, the amount of free space following a map entry, and max_free, the maximum amount of free space in the entry's subtree. These fields make it possible to find a first-fit free region of a given size in one pass down the tree, so O(log n) amortized using splay trees. This significantly reduces the overhead in vm_map_findspace() for applications that mmap() many hundreds or thousands of regions, and has a negligible slowdown (0.1%) on buildworld. See, for example, the discussion of a micro-benchmark titled "Some mmap observations compared to Linux 2.6/OpenBSD" on -hackers in late October 2003. OpenBSD adopted this approach in March 2002, and NetBSD added it in November 2003, both with Red-Black trees. Submitted by: Mark W. Krentel --- sys/vm/vm_map.c | 316 ++++++++++++++++++++++++++++++++---------------- sys/vm/vm_map.h | 3 +- 2 files changed, 217 insertions(+), 102 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 55eaef76370b..067615506f7d 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -514,7 +514,6 @@ _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) map->system_map = 0; map->min_offset = min; map->max_offset = max; - map->first_free = &map->header; map->flags = 0; map->root = NULL; map->timestamp = 0; @@ -572,60 +571,130 @@ vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior) (behavior & MAP_ENTRY_BEHAV_MASK); } +/* + * vm_map_entry_set_max_free: + * + * Set the max_free field in a vm_map_entry. + */ +static __inline void +vm_map_entry_set_max_free(vm_map_entry_t entry) +{ + + entry->max_free = entry->adj_free; + if (entry->left != NULL && entry->left->max_free > entry->max_free) + entry->max_free = entry->left->max_free; + if (entry->right != NULL && entry->right->max_free > entry->max_free) + entry->max_free = entry->right->max_free; +} + /* * vm_map_entry_splay: * - * Implements Sleator and Tarjan's top-down splay algorithm. Returns - * the vm_map_entry containing the given address. If, however, that - * address is not found in the vm_map, returns a vm_map_entry that is - * adjacent to the address, coming before or after it. + * The Sleator and Tarjan top-down splay algorithm with the + * following variation. Max_free must be computed bottom-up, so + * on the downward pass, maintain the left and right spines in + * reverse order. Then, make a second pass up each side to fix + * the pointers and compute max_free. The time bound is O(log n) + * amortized. + * + * The new root is the vm_map_entry containing "addr", or else an + * adjacent entry (lower or higher) if addr is not in the tree. + * + * The map must be locked, and leaves it so. + * + * Returns: the new root. */ static vm_map_entry_t -vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root) +vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) { - struct vm_map_entry dummy; - vm_map_entry_t lefttreemax, righttreemin, y; + vm_map_entry_t llist, rlist; + vm_map_entry_t ltree, rtree; + vm_map_entry_t y; + /* Special case of empty tree. */ if (root == NULL) return (root); - lefttreemax = righttreemin = &dummy; - for (;; root = y) { - if (address < root->start) { - if ((y = root->left) == NULL) + + /* + * Pass One: Splay down the tree until we find addr or a NULL + * pointer where addr would go. llist and rlist are the two + * sides in reverse order (bottom-up), with llist linked by + * the right pointer and rlist linked by the left pointer in + * the vm_map_entry. Wait until Pass Two to set max_free on + * the two spines. + */ + llist = NULL; + rlist = NULL; + for (;;) { + /* root is never NULL in here. */ + if (addr < root->start) { + y = root->left; + if (y == NULL) break; - if (address < y->start) { - /* Rotate right. */ + if (addr < y->start && y->left != NULL) { + /* Rotate right and put y on rlist. */ root->left = y->right; y->right = root; + vm_map_entry_set_max_free(root); + root = y->left; + y->left = rlist; + rlist = y; + } else { + /* Put root on rlist. */ + root->left = rlist; + rlist = root; root = y; - if ((y = root->left) == NULL) - break; } - /* Link into the new root's right tree. */ - righttreemin->left = root; - righttreemin = root; - } else if (address >= root->end) { - if ((y = root->right) == NULL) + } else { + y = root->right; + if (addr < root->end || y == NULL) break; - if (address >= y->end) { - /* Rotate left. */ + if (addr >= y->end && y->right != NULL) { + /* Rotate left and put y on llist. */ root->right = y->left; y->left = root; + vm_map_entry_set_max_free(root); + root = y->right; + y->right = llist; + llist = y; + } else { + /* Put root on llist. */ + root->right = llist; + llist = root; root = y; - if ((y = root->right) == NULL) - break; } - /* Link into the new root's left tree. */ - lefttreemax->right = root; - lefttreemax = root; - } else - break; + } } - /* Assemble the new root. */ - lefttreemax->right = root->left; - righttreemin->left = root->right; - root->left = dummy.right; - root->right = dummy.left; + + /* + * Pass Two: Walk back up the two spines, flip the pointers + * and set max_free. The subtrees of the root go at the + * bottom of llist and rlist. + */ + ltree = root->left; + while (llist != NULL) { + y = llist->right; + llist->right = ltree; + vm_map_entry_set_max_free(llist); + ltree = llist; + llist = y; + } + rtree = root->right; + while (rlist != NULL) { + y = rlist->left; + rlist->left = rtree; + vm_map_entry_set_max_free(rlist); + rtree = rlist; + rlist = y; + } + + /* + * Final assembly: add ltree and rtree as subtrees of root. + */ + root->left = ltree; + root->right = rtree; + vm_map_entry_set_max_free(root); + return (root); } @@ -655,10 +724,15 @@ vm_map_entry_link(vm_map_t map, entry->right = after_where->right; entry->left = after_where; after_where->right = NULL; + after_where->adj_free = entry->start - after_where->end; + vm_map_entry_set_max_free(after_where); } else { entry->right = map->root; entry->left = NULL; } + entry->adj_free = (entry->next == &map->header ? map->max_offset : + entry->next->start) - entry->end; + vm_map_entry_set_max_free(entry); map->root = entry; } @@ -675,6 +749,9 @@ vm_map_entry_unlink(vm_map_t map, else { root = vm_map_entry_splay(entry->start, entry->left); root->right = entry->right; + root->adj_free = (entry->next == &map->header ? map->max_offset : + entry->next->start) - root->end; + vm_map_entry_set_max_free(root); } map->root = root; @@ -687,6 +764,33 @@ vm_map_entry_unlink(vm_map_t map, map->nentries, entry); } +/* + * vm_map_entry_resize_free: + * + * Recompute the amount of free space following a vm_map_entry + * and propagate that value up the tree. Call this function after + * resizing a map entry in-place, that is, without a call to + * vm_map_entry_link() or _unlink(). + * + * The map must be locked, and leaves it so. + */ +static void +vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) +{ + + /* + * Using splay trees without parent pointers, propagating + * max_free up the tree is done by moving the entry to the + * root and making the change there. + */ + if (entry != map->root) + map->root = vm_map_entry_splay(entry->start, map->root); + + entry->adj_free = (entry->next == &map->header ? map->max_offset : + entry->next->start) - entry->end; + vm_map_entry_set_max_free(entry); +} + /* * vm_map_lookup_entry: [ internal use only ] * @@ -814,6 +918,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, (prev_entry->max_protection == max)) { map->size += (end - prev_entry->end); prev_entry->end = end; + vm_map_entry_resize_free(map, prev_entry); vm_map_simplify_entry(map, prev_entry); return (KERN_SUCCESS); } @@ -859,14 +964,6 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_map_entry_link(map, prev_entry, new_entry); map->size += new_entry->end - new_entry->start; - /* - * Update the free space hint - */ - if ((map->first_free == prev_entry) && - (prev_entry->end >= new_entry->start)) { - map->first_free = new_entry; - } - #if 0 /* * Temporarily removed to avoid MAP_STACK panic, due to @@ -889,64 +986,90 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } /* - * Find sufficient space for `length' bytes in the given map, starting at - * `start'. The map must be locked. Returns 0 on success, 1 on no space. + * vm_map_findspace: + * + * Find the first fit (lowest VM address) for "length" free bytes + * beginning at address >= start in the given map. + * + * In a vm_map_entry, "adj_free" is the amount of free space + * adjacent (higher address) to this entry, and "max_free" is the + * maximum amount of contiguous free space in its subtree. This + * allows finding a free region in one path down the tree, so + * O(log n) amortized with splay trees. + * + * The map must be locked, and leaves it so. + * + * Returns: 0 on success, and starting address in *addr, + * 1 if insufficient space. */ int -vm_map_findspace( - vm_map_t map, - vm_offset_t start, - vm_size_t length, - vm_offset_t *addr) +vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length, + vm_offset_t *addr) /* OUT */ { - vm_map_entry_t entry, next; - vm_offset_t end; + vm_map_entry_t entry; + vm_offset_t end, st; + /* Request must fit within min/max VM address. */ if (start < map->min_offset) start = map->min_offset; - if (start > map->max_offset) + if (start + length > map->max_offset) + return (1); + + /* Empty tree means wide open address space. */ + if (map->root == NULL) { + *addr = start; + goto found; + } + + /* + * After splay, if start comes before root node, then there + * must be a gap from start to the root. + */ + map->root = vm_map_entry_splay(start, map->root); + if (start + length <= map->root->start) { + *addr = start; + goto found; + } + + /* + * Root is the last node that might begin its gap before + * start. + */ + st = (start > map->root->end) ? start : map->root->end; + if (st + length <= map->root->end + map->root->adj_free) { + *addr = st; + goto found; + } + + /* With max_free, can immediately tell if no solution. */ + entry = map->root->right; + if (entry == NULL || length > entry->max_free) return (1); /* - * Look for the first possible address; if there's already something - * at this address, we have to start after it. + * Search the right subtree in the order: left subtree, root, + * right subtree (first fit). The previous splay implies that + * all regions in the right subtree have addresses > start. */ - if (start == map->min_offset) { - if ((entry = map->first_free) != &map->header) - start = entry->end; - } else { - vm_map_entry_t tmp; - - if (vm_map_lookup_entry(map, start, &tmp)) - start = tmp->end; - entry = tmp; + while (entry != NULL) { + if (entry->left != NULL && entry->left->max_free >= length) + entry = entry->left; + else if (entry->adj_free >= length) { + *addr = entry->end; + goto found; + } else + entry = entry->right; } - /* - * Look through the rest of the map, trying to fit a new region in the - * gap between existing regions, or after the very last region. - */ - for (;; start = (entry = next)->end) { - /* - * Find the end of the proposed new region. Be sure we didn't - * go beyond the end of the map, or wrap around the address; - * if so, we lose. Otherwise, if this is the last entry, or - * if the proposed new region fits before the next entry, we - * win. - */ - end = start + length; - if (end > map->max_offset || end < start) - return (1); - next = entry->next; - if (next == &map->header || next->start >= end) - break; - } - *addr = start; + /* Can't get here, so panic if we do. */ + panic("vm_map_findspace: max_free corrupt"); + +found: + /* Expand the kernel pmap, if necessary. */ if (map == kernel_map) { - vm_offset_t ksize; - if ((ksize = round_page(start + length)) > kernel_vm_end) { - pmap_growkernel(ksize); - } + end = round_page(*addr + length); + if (end > kernel_vm_end) + pmap_growkernel(end); } return (0); } @@ -1027,11 +1150,11 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) (prev->max_protection == entry->max_protection) && (prev->inheritance == entry->inheritance) && (prev->wired_count == entry->wired_count)) { - if (map->first_free == prev) - map->first_free = entry; vm_map_entry_unlink(map, prev); entry->start = prev->start; entry->offset = prev->offset; + if (entry->prev != &map->header) + vm_map_entry_resize_free(map, entry->prev); if (prev->object.vm_object) vm_object_deallocate(prev->object.vm_object); vm_map_entry_dispose(map, prev); @@ -1050,10 +1173,9 @@ vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) (next->max_protection == entry->max_protection) && (next->inheritance == entry->inheritance) && (next->wired_count == entry->wired_count)) { - if (map->first_free == next) - map->first_free = entry; vm_map_entry_unlink(map, next); entry->end = next->end; + vm_map_entry_resize_free(map, entry); if (next->object.vm_object) vm_object_deallocate(next->object.vm_object); vm_map_entry_dispose(map, next); @@ -2116,15 +2238,6 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) vm_map_clip_start(map, entry, start); } - /* - * Save the free space hint - */ - if (entry == &map->header) { - map->first_free = &map->header; - } else if (map->first_free->start >= start) { - map->first_free = entry->prev; - } - /* * Step through all entries in this region */ @@ -2777,6 +2890,7 @@ vm_map_growstack(struct proc *p, vm_offset_t addr) /* Update the current entry. */ stack_entry->end = addr; stack_entry->avail_ssize -= grow_amount; + vm_map_entry_resize_free(map, stack_entry); rv = KERN_SUCCESS; if (next_entry != &map->header) diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 1d79a5183969..e05d4cbcd50b 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -104,6 +104,8 @@ struct vm_map_entry { vm_offset_t start; /* start address */ vm_offset_t end; /* end address */ vm_offset_t avail_ssize; /* amt can grow if this is a stack */ + vm_size_t adj_free; /* amount of adjacent free space */ + vm_size_t max_free; /* max free space in subtree */ union vm_map_object object; /* object I point to */ vm_ooffset_t offset; /* offset into object */ vm_eflags_t eflags; /* map entry flags */ @@ -187,7 +189,6 @@ struct vm_map { u_char system_map; /* Am I a system map? */ vm_flags_t flags; /* flags for this vm_map */ vm_map_entry_t root; /* Root of a binary search tree */ - vm_map_entry_t first_free; /* First free space hint */ pmap_t pmap; /* (c) Physical map */ #define min_offset header.start /* (c) */ #define max_offset header.end /* (c) */