Very rough first cut at NUMA support for the physical page allocator. For
now it uses a very dumb first-touch allocation policy. This will change in the future. - Each architecture indicates the maximum number of supported memory domains via a new VM_NDOMAIN parameter in <machine/vmparam.h>. - Each cpu now has a PCPU_GET(domain) member to indicate the memory domain a CPU belongs to. Domain values are dense and numbered from 0. - When a platform supports multiple domains, the default freelist (VM_FREELIST_DEFAULT) is split up into N freelists, one for each domain. The MD code is required to populate an array of mem_affinity structures. Each entry in the array defines a range of memory (start and end) and a domain for the range. Multiple entries may be present for a single domain. The list is terminated by an entry where all fields are zero. This array of structures is used to split up phys_avail[] regions that fall in VM_FREELIST_DEFAULT into per-domain freelists. - Each memory domain has a separate lookup-array of freelists that is used when fulfulling a physical memory allocation. Right now the per-domain freelists are listed in a round-robin order for each domain. In the future a table such as the ACPI SLIT table may be used to order the per-domain lookup lists based on the penalty for each memory domain relative to a specific domain. The lookup lists may be examined via a new vm.phys.lookup_lists sysctl. - The first-touch policy is implemented by using PCPU_GET(domain) to pick a lookup list when allocating memory. Reviewed by: alc
This commit is contained in:
parent
257ee8a425
commit
a3870a1826
@ -131,6 +131,13 @@
|
||||
*/
|
||||
#define VM_NFREEORDER 13
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enable superpage reservations: 1 level.
|
||||
*/
|
||||
|
@ -85,6 +85,13 @@
|
||||
*/
|
||||
#define VM_NFREEORDER 9
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable superpage reservations.
|
||||
*/
|
||||
|
@ -118,6 +118,13 @@
|
||||
#define VM_NFREEORDER 11
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enable superpage reservations: 1 level.
|
||||
*/
|
||||
|
@ -119,6 +119,13 @@
|
||||
*/
|
||||
#define VM_NFREEORDER 16
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable superpage reservations.
|
||||
*/
|
||||
|
@ -117,6 +117,13 @@
|
||||
#define KERNBASE ((vm_offset_t)(intptr_t)(int32_t)0x80000000)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable superpage reservations. (not sure if this is right
|
||||
* I copied it from ARM)
|
||||
|
@ -166,6 +166,13 @@ struct pmap_physseg {
|
||||
*/
|
||||
#define VM_NFREEORDER 11
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable superpage reservations.
|
||||
*/
|
||||
|
@ -120,6 +120,13 @@
|
||||
*/
|
||||
#define VM_NFREEORDER 12
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable superpage reservations.
|
||||
*/
|
||||
|
@ -120,6 +120,13 @@
|
||||
*/
|
||||
#define VM_NFREEORDER 12
|
||||
|
||||
/*
|
||||
* Only one memory domain.
|
||||
*/
|
||||
#ifndef VM_NDOMAIN
|
||||
#define VM_NDOMAIN 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable superpage reservations.
|
||||
*/
|
||||
|
@ -179,6 +179,7 @@ struct pcpu {
|
||||
struct device *pc_device;
|
||||
void *pc_netisr; /* netisr SWI cookie */
|
||||
int pc_dnweight; /* vm_page_dontneed() */
|
||||
int pc_domain; /* Memory domain. */
|
||||
|
||||
/*
|
||||
* Stuff for read mostly lock
|
||||
|
147
sys/vm/vm_phys.c
147
sys/vm/vm_phys.c
@ -56,6 +56,13 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_phys.h>
|
||||
#include <vm/vm_reserv.h>
|
||||
|
||||
/*
|
||||
* VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each
|
||||
* domain. These extra lists are stored at the end of the regular
|
||||
* free lists starting with VM_NFREELIST.
|
||||
*/
|
||||
#define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1)
|
||||
|
||||
struct vm_freelist {
|
||||
struct pglist pl;
|
||||
int lcnt;
|
||||
@ -65,15 +72,20 @@ struct vm_phys_seg {
|
||||
vm_paddr_t start;
|
||||
vm_paddr_t end;
|
||||
vm_page_t first_page;
|
||||
int domain;
|
||||
struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
|
||||
};
|
||||
|
||||
struct mem_affinity *mem_affinity;
|
||||
|
||||
static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX];
|
||||
|
||||
static int vm_phys_nsegs;
|
||||
|
||||
static struct vm_freelist
|
||||
vm_phys_free_queues[VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
|
||||
vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER];
|
||||
static struct vm_freelist
|
||||
(*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER];
|
||||
|
||||
static int vm_nfreelists = VM_FREELIST_DEFAULT + 1;
|
||||
|
||||
@ -89,6 +101,14 @@ static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
|
||||
SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
|
||||
NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
|
||||
|
||||
#if VM_NDOMAIN > 1
|
||||
static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS);
|
||||
SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD,
|
||||
NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists");
|
||||
#endif
|
||||
|
||||
static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind,
|
||||
int domain);
|
||||
static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
|
||||
static int vm_phys_paddr_to_segind(vm_paddr_t pa);
|
||||
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
|
||||
@ -157,6 +177,7 @@ sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
|
||||
(uintmax_t)seg->start);
|
||||
sbuf_printf(&sbuf, "end: %#jx\n",
|
||||
(uintmax_t)seg->end);
|
||||
sbuf_printf(&sbuf, "domain: %d\n", seg->domain);
|
||||
sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues);
|
||||
}
|
||||
sbuf_finish(&sbuf);
|
||||
@ -166,11 +187,40 @@ sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS)
|
||||
return (error);
|
||||
}
|
||||
|
||||
#if VM_NDOMAIN > 1
|
||||
/*
|
||||
* Outputs the set of free list lookup lists.
|
||||
*/
|
||||
static int
|
||||
sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
struct sbuf sbuf;
|
||||
char *cbuf;
|
||||
const int cbufsize = (vm_nfreelists + 1) * VM_NDOMAIN * 81;
|
||||
int domain, error, flind, ndomains;
|
||||
|
||||
ndomains = vm_nfreelists - VM_NFREELIST + 1;
|
||||
cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO);
|
||||
sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN);
|
||||
for (domain = 0; domain < ndomains; domain++) {
|
||||
sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain);
|
||||
for (flind = 0; flind < vm_nfreelists; flind++)
|
||||
sbuf_printf(&sbuf, " [%d]:\t%p\n", flind,
|
||||
vm_phys_lookup_lists[domain][flind]);
|
||||
}
|
||||
sbuf_finish(&sbuf);
|
||||
error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf));
|
||||
sbuf_delete(&sbuf);
|
||||
free(cbuf, M_TEMP);
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Create a physical memory segment.
|
||||
*/
|
||||
static void
|
||||
vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
|
||||
_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain)
|
||||
{
|
||||
struct vm_phys_seg *seg;
|
||||
#ifdef VM_PHYSSEG_SPARSE
|
||||
@ -188,14 +238,51 @@ vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
|
||||
seg = &vm_phys_segs[vm_phys_nsegs++];
|
||||
seg->start = start;
|
||||
seg->end = end;
|
||||
seg->domain = domain;
|
||||
#ifdef VM_PHYSSEG_SPARSE
|
||||
seg->first_page = &vm_page_array[pages];
|
||||
#else
|
||||
seg->first_page = PHYS_TO_VM_PAGE(start);
|
||||
#endif
|
||||
#if VM_NDOMAIN > 1
|
||||
if (flind == VM_FREELIST_DEFAULT && domain != 0) {
|
||||
flind = VM_NFREELIST + (domain - 1);
|
||||
if (flind >= vm_nfreelists)
|
||||
vm_nfreelists = flind + 1;
|
||||
}
|
||||
#endif
|
||||
seg->free_queues = &vm_phys_free_queues[flind];
|
||||
}
|
||||
|
||||
static void
|
||||
vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (mem_affinity == NULL) {
|
||||
_vm_phys_create_seg(start, end, flind, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0;; i++) {
|
||||
if (mem_affinity[i].end == 0)
|
||||
panic("Reached end of affinity info");
|
||||
if (mem_affinity[i].end <= start)
|
||||
continue;
|
||||
if (mem_affinity[i].start > start)
|
||||
panic("No affinity info for start %jx",
|
||||
(uintmax_t)start);
|
||||
if (mem_affinity[i].end >= end) {
|
||||
_vm_phys_create_seg(start, end, flind,
|
||||
mem_affinity[i].domain);
|
||||
break;
|
||||
}
|
||||
_vm_phys_create_seg(start, mem_affinity[i].end, flind,
|
||||
mem_affinity[i].domain);
|
||||
start = mem_affinity[i].end;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the physical memory allocator.
|
||||
*/
|
||||
@ -204,6 +291,9 @@ vm_phys_init(void)
|
||||
{
|
||||
struct vm_freelist *fl;
|
||||
int flind, i, oind, pind;
|
||||
#if VM_NDOMAIN > 1
|
||||
int ndomains, j;
|
||||
#endif
|
||||
|
||||
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
|
||||
#ifdef VM_FREELIST_ISADMA
|
||||
@ -246,6 +336,37 @@ vm_phys_init(void)
|
||||
TAILQ_INIT(&fl[oind].pl);
|
||||
}
|
||||
}
|
||||
#if VM_NDOMAIN > 1
|
||||
/*
|
||||
* Build a free list lookup list for each domain. All of the
|
||||
* memory domain lists are inserted at the VM_FREELIST_DEFAULT
|
||||
* index in a round-robin order starting with the current
|
||||
* domain.
|
||||
*/
|
||||
ndomains = vm_nfreelists - VM_NFREELIST + 1;
|
||||
for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++)
|
||||
for (i = 0; i < ndomains; i++)
|
||||
vm_phys_lookup_lists[i][flind] =
|
||||
&vm_phys_free_queues[flind];
|
||||
for (i = 0; i < ndomains; i++)
|
||||
for (j = 0; j < ndomains; j++) {
|
||||
flind = (i + j) % ndomains;
|
||||
if (flind == 0)
|
||||
flind = VM_FREELIST_DEFAULT;
|
||||
else
|
||||
flind += VM_NFREELIST - 1;
|
||||
vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] =
|
||||
&vm_phys_free_queues[flind];
|
||||
}
|
||||
for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST;
|
||||
flind++)
|
||||
for (i = 0; i < ndomains; i++)
|
||||
vm_phys_lookup_lists[i][flind + ndomains - 1] =
|
||||
&vm_phys_free_queues[flind];
|
||||
#else
|
||||
for (flind = 0; flind < vm_nfreelists; flind++)
|
||||
vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind];
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -321,7 +442,7 @@ vm_phys_alloc_freelist_pages(int flind, int pool, int order)
|
||||
{
|
||||
struct vm_freelist *fl;
|
||||
struct vm_freelist *alt;
|
||||
int oind, pind;
|
||||
int domain, oind, pind;
|
||||
vm_page_t m;
|
||||
|
||||
KASSERT(flind < VM_NFREELIST,
|
||||
@ -330,8 +451,14 @@ vm_phys_alloc_freelist_pages(int flind, int pool, int order)
|
||||
("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
|
||||
KASSERT(order < VM_NFREEORDER,
|
||||
("vm_phys_alloc_freelist_pages: order %d is out of range", order));
|
||||
|
||||
#if VM_NDOMAIN > 1
|
||||
domain = PCPU_GET(domain);
|
||||
#else
|
||||
domain = 0;
|
||||
#endif
|
||||
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
|
||||
fl = vm_phys_free_queues[flind][pool];
|
||||
fl = (*vm_phys_lookup_lists[domain][flind])[pool];
|
||||
for (oind = order; oind < VM_NFREEORDER; oind++) {
|
||||
m = TAILQ_FIRST(&fl[oind].pl);
|
||||
if (m != NULL) {
|
||||
@ -351,7 +478,7 @@ vm_phys_alloc_freelist_pages(int flind, int pool, int order)
|
||||
*/
|
||||
for (oind = VM_NFREEORDER - 1; oind >= order; oind--) {
|
||||
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
|
||||
alt = vm_phys_free_queues[flind][pind];
|
||||
alt = (*vm_phys_lookup_lists[domain][flind])[pind];
|
||||
m = TAILQ_FIRST(&alt[oind].pl);
|
||||
if (m != NULL) {
|
||||
TAILQ_REMOVE(&alt[oind].pl, m, pageq);
|
||||
@ -613,8 +740,13 @@ vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
|
||||
struct vnode *vp;
|
||||
vm_paddr_t pa, pa_last, size;
|
||||
vm_page_t deferred_vdrop_list, m, m_ret;
|
||||
int flind, i, oind, order, pind;
|
||||
int domain, flind, i, oind, order, pind;
|
||||
|
||||
#if VM_NDOMAIN > 1
|
||||
domain = PCPU_GET(domain);
|
||||
#else
|
||||
domain = 0;
|
||||
#endif
|
||||
size = npages << PAGE_SHIFT;
|
||||
KASSERT(size != 0,
|
||||
("vm_phys_alloc_contig: size must not be 0"));
|
||||
@ -632,7 +764,8 @@ retry:
|
||||
for (flind = 0; flind < vm_nfreelists; flind++) {
|
||||
for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) {
|
||||
for (pind = 0; pind < VM_NFREEPOOL; pind++) {
|
||||
fl = vm_phys_free_queues[flind][pind];
|
||||
fl = (*vm_phys_lookup_lists[domain][flind])
|
||||
[pind];
|
||||
TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) {
|
||||
/*
|
||||
* A free list may contain physical pages
|
||||
|
@ -40,6 +40,15 @@
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
/* Domains must be dense (non-sparse) and zero-based. */
|
||||
struct mem_affinity {
|
||||
vm_paddr_t start;
|
||||
vm_paddr_t end;
|
||||
int domain;
|
||||
};
|
||||
|
||||
extern struct mem_affinity *mem_affinity;
|
||||
|
||||
void vm_phys_add_page(vm_paddr_t pa);
|
||||
vm_page_t vm_phys_alloc_contig(unsigned long npages,
|
||||
vm_paddr_t low, vm_paddr_t high,
|
||||
|
Loading…
x
Reference in New Issue
Block a user