Implement NUMA policy for kmem_*(9). This maintains compatibility with

reservations by giving each memory domain its own KVA space in vmem that
is naturally aligned on superpage boundaries.

Reviewed by:	alc, markj, kib  (some objections)
Sponsored by:	Netflix, Dell/EMC Isilon
Tested by;	pho
Differential Revision:	https://reviews.freebsd.org/D13289
This commit is contained in:
jeff 2018-01-12 23:13:55 +00:00
parent f48ea5d543
commit e7c9f84113
6 changed files with 197 additions and 52 deletions

View File

@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_kern.h> #include <vm/vm_kern.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#define VMEM_OPTORDER 5 #define VMEM_OPTORDER 5
@ -186,6 +187,7 @@ static struct task vmem_periodic_wk;
static struct mtx_padalign __exclusive_cache_line vmem_list_lock; static struct mtx_padalign __exclusive_cache_line vmem_list_lock;
static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
static uma_zone_t vmem_zone;
/* ---- misc */ /* ---- misc */
#define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan)
@ -255,11 +257,11 @@ bt_fill(vmem_t *vm, int flags)
VMEM_ASSERT_LOCKED(vm); VMEM_ASSERT_LOCKED(vm);
/* /*
* Only allow the kernel arena to dip into reserve tags. It is the * Only allow the kernel arena and arenas derived from kernel arena to
* vmem where new tags come from. * dip into reserve tags. They are where new tags come from.
*/ */
flags &= BT_FLAGS; flags &= BT_FLAGS;
if (vm != kernel_arena) if (vm != kernel_arena && vm->vm_arg != kernel_arena)
flags &= ~M_USE_RESERVE; flags &= ~M_USE_RESERVE;
/* /*
@ -615,23 +617,25 @@ static void *
vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
{ {
vmem_addr_t addr; vmem_addr_t addr;
int domain;
*pflag = UMA_SLAB_KERNEL; *pflag = UMA_SLAB_KERNEL;
domain = 0; /* XXX Temporary. */
/* /*
* Single thread boundary tag allocation so that the address space * Single thread boundary tag allocation so that the address space
* and memory are added in one atomic operation. * and memory are added in one atomic operation.
*/ */
mtx_lock(&vmem_bt_lock); mtx_lock(&vmem_bt_lock);
if (vmem_xalloc(kernel_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, if (vmem_xalloc(vm_dom[domain].vmd_kernel_arena, bytes, 0, 0, 0,
VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
&addr) == 0) { M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) {
if (kmem_back(kernel_object, addr, bytes, if (kmem_back_domain(domain, kernel_object, addr, bytes,
M_NOWAIT | M_USE_RESERVE) == 0) { M_NOWAIT | M_USE_RESERVE) == 0) {
mtx_unlock(&vmem_bt_lock); mtx_unlock(&vmem_bt_lock);
return ((void *)addr); return ((void *)addr);
} }
vmem_xfree(kernel_arena, addr, bytes); vmem_xfree(vm_dom[domain].vmd_kernel_arena, addr, bytes);
mtx_unlock(&vmem_bt_lock); mtx_unlock(&vmem_bt_lock);
/* /*
* Out of memory, not address space. This may not even be * Out of memory, not address space. This may not even be
@ -657,9 +661,12 @@ vmem_startup(void)
{ {
mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF);
vmem_zone = uma_zcreate("vmem",
sizeof(struct vmem), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_VM);
vmem_bt_zone = uma_zcreate("vmem btag", vmem_bt_zone = uma_zcreate("vmem btag",
sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, sizeof(struct vmem_btag), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_VM); UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
#ifndef UMA_MD_SMALL_ALLOC #ifndef UMA_MD_SMALL_ALLOC
mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF); mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF);
uma_prealloc(vmem_bt_zone, BT_MAXALLOC); uma_prealloc(vmem_bt_zone, BT_MAXALLOC);
@ -826,7 +833,7 @@ vmem_destroy1(vmem_t *vm)
VMEM_CONDVAR_DESTROY(vm); VMEM_CONDVAR_DESTROY(vm);
VMEM_LOCK_DESTROY(vm); VMEM_LOCK_DESTROY(vm);
free(vm, M_VMEM); uma_zfree(vmem_zone, vm);
} }
static int static int
@ -1058,7 +1065,7 @@ vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
vmem_t *vm; vmem_t *vm;
vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT)); vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT));
if (vm == NULL) if (vm == NULL)
return (NULL); return (NULL);
if (vmem_init(vm, name, base, size, quantum, qcache_max, if (vmem_init(vm, name, base, size, quantum, qcache_max,

View File

@ -56,14 +56,21 @@ void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t);
/* These operate on virtual addresses backed by memory. */ /* These operate on virtual addresses backed by memory. */
vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags, vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr);
vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags, vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr); vm_memattr_t memattr);
vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags,
vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr);
vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags);
vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags);
void kmem_free(struct vmem *, vm_offset_t, vm_size_t); void kmem_free(struct vmem *, vm_offset_t, vm_size_t);
/* This provides memory for previously allocated address space. */ /* This provides memory for previously allocated address space. */
int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int);
int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int);
void kmem_unback(vm_object_t, vm_offset_t, vm_size_t); void kmem_unback(vm_object_t, vm_offset_t, vm_size_t);
/* Bootstrapping. */ /* Bootstrapping. */

View File

@ -81,16 +81,25 @@ __FBSDID("$FreeBSD$");
#include <sys/bio.h> #include <sys/bio.h>
#include <sys/buf.h> #include <sys/buf.h>
#include <sys/vmem.h> #include <sys/vmem.h>
#include <sys/vmmeter.h>
#include <vm/vm.h> #include <vm/vm.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_kern.h> #include <vm/vm_kern.h>
#include <vm/vm_object.h> #include <vm/vm_object.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_phys.h>
#include <vm/vm_map.h> #include <vm/vm_map.h>
#include <vm/vm_pager.h> #include <vm/vm_pager.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
#if VM_NRESERVLEVEL > 0
#define KVA_QUANTUM (1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT))
#else
/* On non-superpage architectures want large import sizes. */
#define KVA_QUANTUM (PAGE_SIZE * 1024)
#endif
long physmem; long physmem;
/* /*
@ -107,7 +116,10 @@ kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp)
{ {
vm_offset_t addr; vm_offset_t addr;
int result; int result;
KASSERT((size % KVA_QUANTUM) == 0,
("kva_import: Size %jd is not a multiple of %d",
(intmax_t)size, (int)KVA_QUANTUM));
addr = vm_map_min(kernel_map); addr = vm_map_min(kernel_map);
result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0, result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
@ -130,6 +142,7 @@ static void
vm_mem_init(dummy) vm_mem_init(dummy)
void *dummy; void *dummy;
{ {
int domain;
/* /*
* Initializes resident memory structures. From here on, all physical * Initializes resident memory structures. From here on, all physical
@ -150,13 +163,15 @@ vm_mem_init(dummy)
* Initialize the kernel_arena. This can grow on demand. * Initialize the kernel_arena. This can grow on demand.
*/ */
vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0); vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0);
vmem_set_import(kernel_arena, kva_import, NULL, NULL, vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM);
#if VM_NRESERVLEVEL > 0
1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)); for (domain = 0; domain < vm_ndomains; domain++) {
#else vm_dom[domain].vmd_kernel_arena = vmem_create(
/* On non-superpage architectures want large import sizes. */ "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK);
PAGE_SIZE * 1024); vmem_set_import(vm_dom[domain].vmd_kernel_arena,
#endif (vmem_import_t *)vmem_alloc, NULL, kernel_arena,
KVA_QUANTUM);
}
kmem_init_zero_region(); kmem_init_zero_region();
pmap_init(); pmap_init();

View File

@ -67,9 +67,12 @@
#include <sys/cdefs.h> #include <sys/cdefs.h>
__FBSDID("$FreeBSD$"); __FBSDID("$FreeBSD$");
#include "opt_vm.h"
#include <sys/param.h> #include <sys/param.h>
#include <sys/systm.h> #include <sys/systm.h>
#include <sys/kernel.h> /* for ticks and hz */ #include <sys/kernel.h> /* for ticks and hz */
#include <sys/domainset.h>
#include <sys/eventhandler.h> #include <sys/eventhandler.h>
#include <sys/lock.h> #include <sys/lock.h>
#include <sys/proc.h> #include <sys/proc.h>
@ -77,15 +80,18 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h> #include <sys/rwlock.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/vmem.h> #include <sys/vmem.h>
#include <sys/vmmeter.h>
#include <vm/vm.h> #include <vm/vm.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
#include <vm/vm_domainset.h>
#include <vm/vm_kern.h> #include <vm/vm_kern.h>
#include <vm/pmap.h> #include <vm/pmap.h>
#include <vm/vm_map.h> #include <vm/vm_map.h>
#include <vm/vm_object.h> #include <vm/vm_object.h>
#include <vm/vm_page.h> #include <vm/vm_page.h>
#include <vm/vm_pageout.h> #include <vm/vm_pageout.h>
#include <vm/vm_phys.h>
#include <vm/vm_radix.h> #include <vm/vm_radix.h>
#include <vm/vm_extern.h> #include <vm/vm_extern.h>
#include <vm/uma.h> #include <vm/uma.h>
@ -161,17 +167,17 @@ kva_free(vm_offset_t addr, vm_size_t size)
* given flags, then the pages are zeroed before they are mapped. * given flags, then the pages are zeroed before they are mapped.
*/ */
vm_offset_t vm_offset_t
kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, vm_memattr_t memattr) vm_paddr_t high, vm_memattr_t memattr)
{ {
vmem_t *vmem;
vm_object_t object = kernel_object; vm_object_t object = kernel_object;
vm_offset_t addr, i, offset; vm_offset_t addr, i, offset;
vm_page_t m; vm_page_t m;
int pflags, tries; int pflags, tries;
KASSERT(vmem == kernel_arena,
("kmem_alloc_attr: Only kernel_arena is supported."));
size = round_page(size); size = round_page(size);
vmem = vm_dom[domain].vmd_kernel_arena;
if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr))
return (0); return (0);
offset = addr - VM_MIN_KERNEL_ADDRESS; offset = addr - VM_MIN_KERNEL_ADDRESS;
@ -182,13 +188,13 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
for (i = 0; i < size; i += PAGE_SIZE) { for (i = 0; i < size; i += PAGE_SIZE) {
tries = 0; tries = 0;
retry: retry:
m = vm_page_alloc_contig(object, atop(offset + i), m = vm_page_alloc_contig_domain(object, atop(offset + i),
pflags, 1, low, high, PAGE_SIZE, 0, memattr); domain, pflags, 1, low, high, PAGE_SIZE, 0, memattr);
if (m == NULL) { if (m == NULL) {
VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(object);
if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
if (!vm_page_reclaim_contig(pflags, 1, if (!vm_page_reclaim_contig_domain(domain,
low, high, PAGE_SIZE, 0) && pflags, 1, low, high, PAGE_SIZE, 0) &&
(flags & M_WAITOK) != 0) (flags & M_WAITOK) != 0)
VM_WAIT; VM_WAIT;
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
@ -199,6 +205,9 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
vmem_free(vmem, addr, size); vmem_free(vmem, addr, size);
return (0); return (0);
} }
KASSERT(vm_phys_domidx(m) == domain,
("kmem_alloc_attr_domain: Domain mismatch %d != %d",
vm_phys_domidx(m), domain));
if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m); pmap_zero_page(m);
m->valid = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL;
@ -209,6 +218,28 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
return (addr); return (addr);
} }
vm_offset_t
kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, vm_memattr_t memattr)
{
struct vm_domainset_iter di;
vm_offset_t addr;
int domain;
KASSERT(vmem == kernel_arena,
("kmem_alloc_attr: Only kernel_arena is supported."));
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
do {
addr = kmem_alloc_attr_domain(domain, size, flags, low, high,
memattr);
if (addr != 0)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
return (addr);
}
/* /*
* Allocates a region from the kernel address map and physically * Allocates a region from the kernel address map and physically
* contiguous pages within the specified address range to the kernel * contiguous pages within the specified address range to the kernel
@ -218,19 +249,19 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
* mapped. * mapped.
*/ */
vm_offset_t vm_offset_t
kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr) vm_memattr_t memattr)
{ {
vmem_t *vmem;
vm_object_t object = kernel_object; vm_object_t object = kernel_object;
vm_offset_t addr, offset, tmp; vm_offset_t addr, offset, tmp;
vm_page_t end_m, m; vm_page_t end_m, m;
u_long npages; u_long npages;
int pflags, tries; int pflags, tries;
KASSERT(vmem == kernel_arena,
("kmem_alloc_contig: Only kernel_arena is supported."));
size = round_page(size); size = round_page(size);
vmem = vm_dom[domain].vmd_kernel_arena;
if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr))
return (0); return (0);
offset = addr - VM_MIN_KERNEL_ADDRESS; offset = addr - VM_MIN_KERNEL_ADDRESS;
@ -241,13 +272,14 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low,
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
tries = 0; tries = 0;
retry: retry:
m = vm_page_alloc_contig(object, atop(offset), pflags, m = vm_page_alloc_contig_domain(object, atop(offset), domain, pflags,
npages, low, high, alignment, boundary, memattr); npages, low, high, alignment, boundary, memattr);
if (m == NULL) { if (m == NULL) {
VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(object);
if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
if (!vm_page_reclaim_contig(pflags, npages, low, high, if (!vm_page_reclaim_contig_domain(domain, pflags,
alignment, boundary) && (flags & M_WAITOK) != 0) npages, low, high, alignment, boundary) &&
(flags & M_WAITOK) != 0)
VM_WAIT; VM_WAIT;
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
tries++; tries++;
@ -256,6 +288,9 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low,
vmem_free(vmem, addr, size); vmem_free(vmem, addr, size);
return (0); return (0);
} }
KASSERT(vm_phys_domidx(m) == domain,
("kmem_alloc_contig_domain: Domain mismatch %d != %d",
vm_phys_domidx(m), domain));
end_m = m + npages; end_m = m + npages;
tmp = addr; tmp = addr;
for (; m < end_m; m++) { for (; m < end_m; m++) {
@ -270,6 +305,29 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low,
return (addr); return (addr);
} }
vm_offset_t
kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
vm_memattr_t memattr)
{
struct vm_domainset_iter di;
vm_offset_t addr;
int domain;
KASSERT(vmem == kernel_arena,
("kmem_alloc_contig: Only kernel_arena is supported."));
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
do {
addr = kmem_alloc_contig_domain(domain, size, flags, low, high,
alignment, boundary, memattr);
if (addr != 0)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
return (addr);
}
/* /*
* kmem_suballoc: * kmem_suballoc:
* *
@ -313,18 +371,18 @@ kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max,
* Allocate wired-down pages in the kernel's address space. * Allocate wired-down pages in the kernel's address space.
*/ */
vm_offset_t vm_offset_t
kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) kmem_malloc_domain(int domain, vm_size_t size, int flags)
{ {
vmem_t *vmem;
vm_offset_t addr; vm_offset_t addr;
int rv; int rv;
KASSERT(vmem == kernel_arena, vmem = vm_dom[domain].vmd_kernel_arena;
("kmem_malloc: Only kernel_arena is supported."));
size = round_page(size); size = round_page(size);
if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr))
return (0); return (0);
rv = kmem_back(kernel_object, addr, size, flags); rv = kmem_back_domain(domain, kernel_object, addr, size, flags);
if (rv != KERN_SUCCESS) { if (rv != KERN_SUCCESS) {
vmem_free(vmem, addr, size); vmem_free(vmem, addr, size);
return (0); return (0);
@ -332,20 +390,41 @@ kmem_malloc(struct vmem *vmem, vm_size_t size, int flags)
return (addr); return (addr);
} }
vm_offset_t
kmem_malloc(struct vmem *vmem, vm_size_t size, int flags)
{
struct vm_domainset_iter di;
vm_offset_t addr;
int domain;
KASSERT(vmem == kernel_arena,
("kmem_malloc: Only kernel_arena is supported."));
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
do {
addr = kmem_malloc_domain(domain, size, flags);
if (addr != 0)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
return (addr);
}
/* /*
* kmem_back: * kmem_back:
* *
* Allocate physical pages for the specified virtual address range. * Allocate physical pages for the specified virtual address range.
*/ */
int int
kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr,
vm_size_t size, int flags)
{ {
vm_offset_t offset, i; vm_offset_t offset, i;
vm_page_t m, mpred; vm_page_t m, mpred;
int pflags; int pflags;
KASSERT(object == kernel_object, KASSERT(object == kernel_object,
("kmem_back: only supports kernel object.")); ("kmem_back_domain: only supports kernel object."));
offset = addr - VM_MIN_KERNEL_ADDRESS; offset = addr - VM_MIN_KERNEL_ADDRESS;
pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED;
@ -358,8 +437,8 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
retry: retry:
mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i));
for (; i < size; i += PAGE_SIZE, mpred = m) { for (; i < size; i += PAGE_SIZE, mpred = m) {
m = vm_page_alloc_after(object, atop(offset + i), pflags, m = vm_page_alloc_domain_after(object, atop(offset + i),
mpred); domain, pflags, mpred);
/* /*
* Ran out of space, free everything up and return. Don't need * Ran out of space, free everything up and return. Don't need
@ -373,6 +452,9 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
kmem_unback(object, addr, i); kmem_unback(object, addr, i);
return (KERN_NO_SPACE); return (KERN_NO_SPACE);
} }
KASSERT(vm_phys_domidx(m) == domain,
("kmem_back_domain: Domain mismatch %d != %d",
vm_phys_domidx(m), domain));
if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m); pmap_zero_page(m);
KASSERT((m->oflags & VPO_UNMANAGED) != 0, KASSERT((m->oflags & VPO_UNMANAGED) != 0,
@ -386,6 +468,26 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
return (KERN_SUCCESS); return (KERN_SUCCESS);
} }
int
kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
{
struct vm_domainset_iter di;
int domain;
int ret;
KASSERT(object == kernel_object,
("kmem_back: only supports kernel object."));
vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags);
do {
ret = kmem_back_domain(domain, object, addr, size, flags);
if (ret == KERN_SUCCESS)
break;
} while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0);
return (ret);
}
/* /*
* kmem_unback: * kmem_unback:
* *
@ -395,26 +497,39 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
* A physical page must exist within the specified object at each index * A physical page must exist within the specified object at each index
* that is being unmapped. * that is being unmapped.
*/ */
void static int
kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) _kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
{ {
vm_page_t m, next; vm_page_t m, next;
vm_offset_t end, offset; vm_offset_t end, offset;
int domain;
KASSERT(object == kernel_object, KASSERT(object == kernel_object,
("kmem_unback: only supports kernel object.")); ("kmem_unback: only supports kernel object."));
if (size == 0)
return (0);
pmap_remove(kernel_pmap, addr, addr + size); pmap_remove(kernel_pmap, addr, addr + size);
offset = addr - VM_MIN_KERNEL_ADDRESS; offset = addr - VM_MIN_KERNEL_ADDRESS;
end = offset + size; end = offset + size;
VM_OBJECT_WLOCK(object); VM_OBJECT_WLOCK(object);
for (m = vm_page_lookup(object, atop(offset)); offset < end; m = vm_page_lookup(object, atop(offset));
offset += PAGE_SIZE, m = next) { domain = vm_phys_domidx(m);
for (; offset < end; offset += PAGE_SIZE, m = next) {
next = vm_page_next(m); next = vm_page_next(m);
vm_page_unwire(m, PQ_NONE); vm_page_unwire(m, PQ_NONE);
vm_page_free(m); vm_page_free(m);
} }
VM_OBJECT_WUNLOCK(object); VM_OBJECT_WUNLOCK(object);
return (domain);
}
void
kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
{
_kmem_unback(object, addr, size);
} }
/* /*
@ -426,12 +541,13 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
void void
kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size)
{ {
int domain;
KASSERT(vmem == kernel_arena, KASSERT(vmem == kernel_arena,
("kmem_free: Only kernel_arena is supported.")); ("kmem_free: Only kernel_arena is supported."));
size = round_page(size); size = round_page(size);
kmem_unback(kernel_object, addr, size); domain = _kmem_unback(kernel_object, addr, size);
vmem_free(vmem, addr, size); vmem_free(vm_dom[domain].vmd_kernel_arena, addr, size);
} }
/* /*

View File

@ -101,7 +101,6 @@ int vm_phys_mem_affinity(int f, int t);
static inline int static inline int
vm_phys_domidx(vm_page_t m) vm_phys_domidx(vm_page_t m)
{ {
#ifdef VM_NUMA_ALLOC
int domn, segind; int domn, segind;
/* XXXKIB try to assert that the page is managed */ /* XXXKIB try to assert that the page is managed */
@ -110,9 +109,6 @@ vm_phys_domidx(vm_page_t m)
domn = vm_phys_segs[segind].domain; domn = vm_phys_segs[segind].domain;
KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
return (domn); return (domn);
#else
return (0);
#endif
} }
/* /*

View File

@ -549,6 +549,8 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0); VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
if (m == NULL) if (m == NULL)
return (NULL); return (NULL);
KASSERT(vm_phys_domidx(m) == domain,
("vm_reserv_alloc_contig: Page domain does not match requested."));
/* /*
* The allocated physical pages always begin at a reservation * The allocated physical pages always begin at a reservation
@ -568,7 +570,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
LIST_INSERT_HEAD(&object->rvq, rv, objq); LIST_INSERT_HEAD(&object->rvq, rv, objq);
rv->object = object; rv->object = object;
rv->pindex = first; rv->pindex = first;
rv->domain = vm_phys_domidx(m); rv->domain = domain;
KASSERT(rv->popcnt == 0, KASSERT(rv->popcnt == 0,
("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted", ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
rv)); rv));
@ -715,7 +717,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
LIST_INSERT_HEAD(&object->rvq, rv, objq); LIST_INSERT_HEAD(&object->rvq, rv, objq);
rv->object = object; rv->object = object;
rv->pindex = first; rv->pindex = first;
rv->domain = vm_phys_domidx(m); rv->domain = domain;
KASSERT(rv->popcnt == 0, KASSERT(rv->popcnt == 0,
("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
KASSERT(!rv->inpartpopq, KASSERT(!rv->inpartpopq,
@ -734,6 +736,8 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
found: found:
index = VM_RESERV_INDEX(object, pindex); index = VM_RESERV_INDEX(object, pindex);
m = &rv->pages[index]; m = &rv->pages[index];
KASSERT(object != kernel_object || vm_phys_domidx(m) == domain,
("vm_reserv_alloc_page: Domain mismatch from reservation."));
/* Handle vm_page_rename(m, new_object, ...). */ /* Handle vm_page_rename(m, new_object, ...). */
if (popmap_is_set(rv->popmap, index)) if (popmap_is_set(rv->popmap, index))
return (NULL); return (NULL);