Refactor the code that performs physically contiguous memory allocation,

yielding a new public interface, vm_page_alloc_contig().  This new function
addresses some of the limitations of the current interfaces, contigmalloc()
and kmem_alloc_contig().  For example, the physically contiguous memory that
is allocated with those interfaces can only be allocated to the kernel vm
object and must be mapped into the kernel virtual address space.  It also
provides functionality that vm_phys_alloc_contig() doesn't, such as wiring
the returned pages.  Moreover, unlike that function, it respects the low
water marks on the paging queues and wakes up the page daemon when
necessary.  That said, at present, this new function can't be applied to all
types of vm objects.  However, that restriction will be eliminated in the
coming weeks.

From a design standpoint, this change also addresses an inconsistency
between vm_phys_alloc_contig() and the other vm_phys_alloc*() functions.
Specifically, vm_phys_alloc_contig() manipulated vm_page fields that other
functions in vm/vm_phys.c didn't.  Moreover, vm_phys_alloc_contig() knew
about vnodes and reservations.  Now, vm_page_alloc_contig() is responsible
for these things.

Reviewed by:	kib
Discussed with:	jhb
This commit is contained in:
Alan Cox 2011-11-16 16:46:09 +00:00
parent d1c5fc763a
commit fbd80bd047
6 changed files with 235 additions and 120 deletions

View File

@ -40,7 +40,6 @@
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
#include <machine/md_var.h>
#include <machine/platform.h>
@ -488,15 +487,22 @@ slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
static vm_offset_t realmax = 0;
void *va;
vm_page_t m;
int pflags;
if (realmax == 0)
realmax = platform_real_maxaddr();
*flags = UMA_SLAB_PRIV;
if ((wait & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
else
pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
if (wait & M_ZERO)
pflags |= VM_ALLOC_ZERO;
for (;;) {
m = vm_phys_alloc_contig(1, 0, realmax, PAGE_SIZE,
PAGE_SIZE);
m = vm_page_alloc_contig(NULL, 0, pflags, 1, 0, realmax,
PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT);
if (m == NULL) {
if (wait & M_NOWAIT)
return (NULL);
@ -513,10 +519,6 @@ slb_uma_real_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
bzero(va, PAGE_SIZE);
/* vm_phys_alloc_contig does not track wiring */
atomic_add_int(&cnt.v_wire_count, 1);
m->wire_count = 1;
return (va);
}

View File

@ -82,7 +82,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
#include <vm/vm_extern.h>
static int
@ -184,22 +183,6 @@ vm_contig_launder(int queue, vm_paddr_t low, vm_paddr_t high)
return (FALSE);
}
/*
* Frees the given physically contiguous pages.
*
* N.B.: Any pages with PG_ZERO set must, in fact, be zero filled.
*/
static void
vm_page_release_contig(vm_page_t m, vm_pindex_t count)
{
while (count--) {
/* Leave PG_ZERO unchanged. */
vm_page_free_toq(m);
m++;
}
}
/*
* Increase the number of cached pages.
*/
@ -238,9 +221,10 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, vm_memattr_t memattr)
{
vm_object_t object = kernel_object;
vm_offset_t addr, i, offset;
vm_offset_t addr;
vm_ooffset_t end_offset, offset;
vm_page_t m;
int tries;
int pflags, tries;
size = round_page(size);
vm_map_lock(map);
@ -252,11 +236,19 @@ kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
vm_object_reference(object);
vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
VM_PROT_ALL, 0);
if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
else
pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
if (flags & M_ZERO)
pflags |= VM_ALLOC_ZERO;
VM_OBJECT_LOCK(object);
for (i = 0; i < size; i += PAGE_SIZE) {
end_offset = offset + size;
for (; offset < end_offset; offset += PAGE_SIZE) {
tries = 0;
retry:
m = vm_phys_alloc_contig(1, low, high, PAGE_SIZE, 0);
m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
low, high, PAGE_SIZE, 0, memattr);
if (m == NULL) {
VM_OBJECT_UNLOCK(object);
if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
@ -277,9 +269,6 @@ retry:
vm_map_unlock(map);
return (0);
}
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
vm_page_insert(m, object, OFF_TO_IDX(offset + i));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
@ -299,65 +288,61 @@ retry:
* specified through the given flags, then the pages are zeroed
* before they are mapped.
*/
static vm_offset_t
contigmapping(vm_map_t map, vm_size_t size, vm_page_t m, vm_memattr_t memattr,
int flags)
{
vm_object_t object = kernel_object;
vm_offset_t addr, tmp_addr;
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
return (0);
}
vm_object_reference(object);
vm_map_insert(map, object, addr - VM_MIN_KERNEL_ADDRESS,
addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(map);
VM_OBJECT_LOCK(object);
for (tmp_addr = addr; tmp_addr < addr + size; tmp_addr += PAGE_SIZE) {
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
vm_page_insert(m, object,
OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
m++;
}
VM_OBJECT_UNLOCK(object);
vm_map_wire(map, addr, addr + size,
VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
return (addr);
}
vm_offset_t
kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr)
{
vm_offset_t ret;
vm_page_t pages;
u_long npgs;
int tries;
vm_object_t object = kernel_object;
vm_offset_t addr;
vm_ooffset_t offset;
vm_page_t end_m, m;
int pflags, tries;
size = round_page(size);
npgs = size >> PAGE_SHIFT;
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
return (0);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(object);
vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
VM_PROT_ALL, 0);
if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
else
pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
if (flags & M_ZERO)
pflags |= VM_ALLOC_ZERO;
VM_OBJECT_LOCK(object);
tries = 0;
retry:
pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary);
if (pages == NULL) {
m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
atop(size), low, high, alignment, boundary, memattr);
if (m == NULL) {
VM_OBJECT_UNLOCK(object);
if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
vm_map_unlock(map);
vm_contig_grow_cache(tries, low, high);
vm_map_lock(map);
VM_OBJECT_LOCK(object);
tries++;
goto retry;
}
ret = 0;
} else {
ret = contigmapping(map, size, pages, memattr, flags);
if (ret == 0)
vm_page_release_contig(pages, npgs);
vm_map_delete(map, addr, addr + size);
vm_map_unlock(map);
return (0);
}
return (ret);
end_m = m + atop(size);
for (; m < end_m; m++) {
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL;
}
VM_OBJECT_UNLOCK(object);
vm_map_unlock(map);
vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
VM_MAP_WIRE_NOHOLES);
return (addr);
}

View File

@ -137,6 +137,7 @@ SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
static uma_zone_t fakepg_zone;
static struct vnode *vm_page_alloc_init(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_queue_remove(int queue, vm_page_t m);
static void vm_page_enqueue(int queue, vm_page_t m);
@ -1480,6 +1481,155 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
return (m);
}
/*
* vm_page_alloc_contig:
*
* Allocate a contiguous set of physical pages of the given size "npages"
* from the free lists. All of the physical pages must be at or above
* the given physical address "low" and below the given physical address
* "high". The given value "alignment" determines the alignment of the
* first physical page in the set. If the given value "boundary" is
* non-zero, then the set of physical pages cannot cross any physical
* address boundary that is a multiple of that value. Both "alignment"
* and "boundary" must be a power of two.
*
* If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT,
* then the memory attribute setting for the physical pages is configured
* to the object's memory attribute setting. Otherwise, the memory
* attribute setting for the physical pages is configured to "memattr",
* overriding the object's memory attribute setting. However, if the
* object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the
* memory attribute setting for the physical pages cannot be configured
* to VM_MEMATTR_DEFAULT.
*
* The caller must always specify an allocation class.
*
* allocation classes:
* VM_ALLOC_NORMAL normal process request
* VM_ALLOC_SYSTEM system *really* needs a page
* VM_ALLOC_INTERRUPT interrupt time request
*
* optional allocation flags:
* VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page
* VM_ALLOC_NOOBJ page is not associated with an object and
* should not have the flag VPO_BUSY set
* VM_ALLOC_WIRED wire the allocated page
* VM_ALLOC_ZERO prefer a zeroed page
*
* This routine may not sleep.
*/
vm_page_t
vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
struct vnode *drop;
vm_page_t deferred_vdrop_list, m, m_ret;
u_int flags, oflags;
int req_class;
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
("vm_page_alloc_contig: inconsistent object/req"));
if (object != NULL) {
VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
KASSERT(object->type == OBJT_PHYS,
("vm_page_alloc_contig: object %p isn't OBJT_PHYS",
object));
}
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
req_class = req & VM_ALLOC_CLASS_MASK;
/*
* The page daemon is allowed to dig deeper into the free page list.
*/
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
deferred_vdrop_list = NULL;
mtx_lock(&vm_page_queue_free_mtx);
if (cnt.v_free_count + cnt.v_cache_count >= npages +
cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM &&
cnt.v_free_count + cnt.v_cache_count >= npages +
cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT &&
cnt.v_free_count + cnt.v_cache_count >= npages)) {
#if VM_NRESERVLEVEL > 0
retry:
#endif
m_ret = vm_phys_alloc_contig(npages, low, high, alignment,
boundary);
} else {
mtx_unlock(&vm_page_queue_free_mtx);
atomic_add_int(&vm_pageout_deficit, npages);
pagedaemon_wakeup();
return (NULL);
}
if (m_ret != NULL)
for (m = m_ret; m < &m_ret[npages]; m++) {
drop = vm_page_alloc_init(m);
if (drop != NULL) {
/*
* Enqueue the vnode for deferred vdrop().
*
* Once the pages are removed from the free
* page list, "pageq" can be safely abused to
* construct a short-lived list of vnodes.
*/
m->pageq.tqe_prev = (void *)drop;
m->pageq.tqe_next = deferred_vdrop_list;
deferred_vdrop_list = m;
}
}
else {
#if VM_NRESERVLEVEL > 0
if (vm_reserv_reclaim_contig(npages << PAGE_SHIFT, low, high,
alignment, boundary))
goto retry;
#endif
}
mtx_unlock(&vm_page_queue_free_mtx);
if (m_ret == NULL)
return (NULL);
/*
* Initialize the pages. Only the PG_ZERO flag is inherited.
*/
flags = 0;
if ((req & VM_ALLOC_ZERO) != 0)
flags = PG_ZERO;
if ((req & VM_ALLOC_WIRED) != 0)
atomic_add_int(&cnt.v_wire_count, npages);
oflags = VPO_UNMANAGED;
if (object != NULL) {
if ((req & VM_ALLOC_NOBUSY) == 0)
oflags |= VPO_BUSY;
if (object->memattr != VM_MEMATTR_DEFAULT &&
memattr == VM_MEMATTR_DEFAULT)
memattr = object->memattr;
}
for (m = m_ret; m < &m_ret[npages]; m++) {
m->aflags = 0;
m->flags &= flags;
if ((req & VM_ALLOC_WIRED) != 0)
m->wire_count = 1;
/* Unmanaged pages don't use "act_count". */
m->oflags = oflags;
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
if (object != NULL)
vm_page_insert(m, object, pindex);
else
m->pindex = pindex;
pindex++;
}
while (deferred_vdrop_list != NULL) {
vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
}
if (vm_paging_needed())
pagedaemon_wakeup();
return (m_ret);
}
/*
* Initialize a page that has been freshly dequeued from a freelist.
* The caller has to drop the vnode returned, if it is not NULL.
@ -1488,7 +1638,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
*
* To be called with vm_page_queue_free_mtx held.
*/
struct vnode *
static struct vnode *
vm_page_alloc_init(vm_page_t m)
{
struct vnode *drop;
@ -1529,9 +1679,6 @@ vm_page_alloc_init(vm_page_t m)
}
/* Don't clear the PG_ZERO flag; we'll need it later. */
m->flags &= PG_ZERO;
m->aflags = 0;
m->oflags = VPO_UNMANAGED;
/* Unmanaged pages don't use "act_count". */
return (drop);
}
@ -1598,6 +1745,7 @@ vm_page_alloc_freelist(int flind, int req)
/*
* Initialize the page. Only the PG_ZERO flag is inherited.
*/
m->aflags = 0;
flags = 0;
if ((req & VM_ALLOC_ZERO) != 0)
flags = PG_ZERO;
@ -1610,6 +1758,8 @@ vm_page_alloc_freelist(int flind, int req)
atomic_add_int(&cnt.v_wire_count, 1);
m->wire_count = 1;
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
if (drop != NULL)
vdrop(drop);
if (vm_paging_needed())

View File

@ -359,8 +359,10 @@ void vm_pageq_remove(vm_page_t m);
void vm_page_activate (vm_page_t);
vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr);
vm_page_t vm_page_alloc_freelist(int, int);
struct vnode *vm_page_alloc_init(vm_page_t);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
void vm_page_cache(vm_page_t);
void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);

View File

@ -29,11 +29,17 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Physical memory system implementation
*
* Any external functions defined by this module are only to be used by the
* virtual memory system.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ddb.h"
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -45,7 +51,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
#include <ddb/ddb.h>
@ -55,7 +60,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
#include <vm/vm_reserv.h>
/*
* VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
@ -755,12 +759,12 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
struct vnode *vp;
vm_paddr_t pa, pa_last, size;
vm_page_t deferred_vdrop_list, m, m_ret;
vm_page_t m, m_ret;
u_long npages_end;
int domain, flind, i, oind, order, pind;
int domain, flind, oind, order, pind;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
#if VM_NDOMAIN > 1
domain = PCPU_GET(domain);
#else
@ -773,13 +777,8 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
("vm_phys_alloc_contig: alignment must be a power of 2"));
KASSERT((boundary & (boundary - 1)) == 0,
("vm_phys_alloc_contig: boundary must be a power of 2"));
deferred_vdrop_list = NULL;
/* Compute the queue that is the best fit for npages. */
for (order = 0; (1 << order) < npages; order++);
mtx_lock(&vm_page_queue_free_mtx);
#if VM_NRESERVLEVEL > 0
retry:
#endif
for (flind = 0; flind < vm_nfreelists; flind++) {
for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
@ -838,11 +837,6 @@ retry:
}
}
}
#if VM_NRESERVLEVEL > 0
if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary))
goto retry;
#endif
mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
done:
for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) {
@ -855,31 +849,10 @@ done:
vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
fl = (*seg->free_queues)[m_ret->pool];
vm_phys_split_pages(m_ret, oind, fl, order);
for (i = 0; i < npages; i++) {
m = &m_ret[i];
vp = vm_page_alloc_init(m);
if (vp != NULL) {
/*
* Enqueue the vnode for deferred vdrop().
*
* Unmanaged pages don't use "pageq", so it
* can be safely abused to construct a short-
* lived queue of vnodes.
*/
m->pageq.tqe_prev = (void *)vp;
m->pageq.tqe_next = deferred_vdrop_list;
deferred_vdrop_list = m;
}
}
/* Return excess pages to the free lists. */
npages_end = roundup2(npages, 1 << imin(oind, order));
if (npages < npages_end)
vm_phys_free_contig(&m_ret[npages], npages_end - npages);
mtx_unlock(&vm_page_queue_free_mtx);
while (deferred_vdrop_list != NULL) {
vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev);
deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next;
}
return (m_ret);
}

View File

@ -49,6 +49,9 @@ struct mem_affinity {
extern struct mem_affinity *mem_affinity;
/*
* The following functions are only to be used by the virtual memory system.
*/
void vm_phys_add_page(vm_paddr_t pa);
vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
u_long alignment, vm_paddr_t boundary);