2010-07-27 20:40:46 +00:00
|
|
|
/*-
|
2015-04-23 14:22:20 +00:00
|
|
|
* Copyright (c) 2010 Hudson River Trading LLC
|
2010-07-27 20:40:46 +00:00
|
|
|
* Written by: John H. Baldwin <jhb@FreeBSD.org>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/bus.h>
|
|
|
|
#include <sys/kernel.h>
|
Split the pagequeues per NUMA domains, and split pageademon process
into threads each processing queue in a single domain. The structure
of the pagedaemons and queues is kept intact, most of the changes come
from the need for code to find an owning page queue for given page,
calculated from the segment containing the page.
The tie between NUMA domain and pagedaemon thread/pagequeue split is
rather arbitrary, the multithreaded daemon could be allowed for the
single-domain machines, or one domain might be split into several page
domains, to further increase concurrency.
Right now, each pagedaemon thread tries to reach the global target,
precalculated at the start of the pass. This is not optimal, since it
could cause excessive page deactivation and freeing. The code should
be changed to re-check the global page deficit state in the loop after
some number of iterations.
The pagedaemons reach the quorum before starting the OOM, since one
thread inability to meet the target is normal for split queues. Only
when all pagedaemons fail to produce enough reusable pages, OOM is
started by single selected thread.
Launder is modified to take into account the segments layout with
regard to the region for which cleaning is performed.
Based on the preliminary patch by jeff, sponsored by EMC / Isilon
Storage Division.
Reviewed by: alc
Tested by: pho
Sponsored by: The FreeBSD Foundation
2013-08-07 16:36:38 +00:00
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/mutex.h>
|
2010-07-27 20:40:46 +00:00
|
|
|
#include <sys/smp.h>
|
Split the pagequeues per NUMA domains, and split pageademon process
into threads each processing queue in a single domain. The structure
of the pagedaemons and queues is kept intact, most of the changes come
from the need for code to find an owning page queue for given page,
calculated from the segment containing the page.
The tie between NUMA domain and pagedaemon thread/pagequeue split is
rather arbitrary, the multithreaded daemon could be allowed for the
single-domain machines, or one domain might be split into several page
domains, to further increase concurrency.
Right now, each pagedaemon thread tries to reach the global target,
precalculated at the start of the pass. This is not optimal, since it
could cause excessive page deactivation and freeing. The code should
be changed to re-check the global page deficit state in the loop after
some number of iterations.
The pagedaemons reach the quorum before starting the OOM, since one
thread inability to meet the target is normal for split queues. Only
when all pagedaemons fail to produce enough reusable pages, OOM is
started by single selected thread.
Launder is modified to take into account the segments layout with
regard to the region for which cleaning is performed.
Based on the preliminary patch by jeff, sponsored by EMC / Isilon
Storage Division.
Reviewed by: alc
Tested by: pho
Sponsored by: The FreeBSD Foundation
2013-08-07 16:36:38 +00:00
|
|
|
#include <sys/vmmeter.h>
|
2010-07-27 20:40:46 +00:00
|
|
|
#include <vm/vm.h>
|
|
|
|
#include <vm/pmap.h>
|
|
|
|
#include <vm/vm_param.h>
|
Split the pagequeues per NUMA domains, and split pageademon process
into threads each processing queue in a single domain. The structure
of the pagedaemons and queues is kept intact, most of the changes come
from the need for code to find an owning page queue for given page,
calculated from the segment containing the page.
The tie between NUMA domain and pagedaemon thread/pagequeue split is
rather arbitrary, the multithreaded daemon could be allowed for the
single-domain machines, or one domain might be split into several page
domains, to further increase concurrency.
Right now, each pagedaemon thread tries to reach the global target,
precalculated at the start of the pass. This is not optimal, since it
could cause excessive page deactivation and freeing. The code should
be changed to re-check the global page deficit state in the loop after
some number of iterations.
The pagedaemons reach the quorum before starting the OOM, since one
thread inability to meet the target is normal for split queues. Only
when all pagedaemons fail to produce enough reusable pages, OOM is
started by single selected thread.
Launder is modified to take into account the segments layout with
regard to the region for which cleaning is performed.
Based on the preliminary patch by jeff, sponsored by EMC / Isilon
Storage Division.
Reviewed by: alc
Tested by: pho
Sponsored by: The FreeBSD Foundation
2013-08-07 16:36:38 +00:00
|
|
|
#include <vm/vm_page.h>
|
2010-07-27 20:40:46 +00:00
|
|
|
#include <vm/vm_phys.h>
|
|
|
|
|
|
|
|
#include <contrib/dev/acpica/include/acpi.h>
|
|
|
|
#include <contrib/dev/acpica/include/actables.h>
|
|
|
|
|
|
|
|
#include <machine/intr_machdep.h>
|
2014-01-23 20:10:22 +00:00
|
|
|
#include <x86/apicvar.h>
|
2010-07-27 20:40:46 +00:00
|
|
|
|
|
|
|
#include <dev/acpica/acpivar.h>
|
|
|
|
|
2013-05-07 22:46:24 +00:00
|
|
|
#if MAXMEMDOM > 1
|
2010-07-27 20:40:46 +00:00
|
|
|
struct cpu_info {
|
|
|
|
int enabled:1;
|
|
|
|
int has_memory:1;
|
|
|
|
int domain;
|
|
|
|
} cpus[MAX_APIC_ID + 1];
|
|
|
|
|
|
|
|
struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
|
|
|
|
int num_mem;
|
|
|
|
|
|
|
|
static ACPI_TABLE_SRAT *srat;
|
|
|
|
static vm_paddr_t srat_physaddr;
|
|
|
|
|
2014-10-09 05:34:28 +00:00
|
|
|
static int vm_domains[VM_PHYSSEG_MAX];
|
|
|
|
|
2015-05-08 00:56:56 +00:00
|
|
|
static ACPI_TABLE_SLIT *slit;
|
|
|
|
static vm_paddr_t slit_physaddr;
|
|
|
|
static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
|
|
|
|
|
2010-07-27 20:40:46 +00:00
|
|
|
static void srat_walk_table(acpi_subtable_handler *handler, void *arg);
|
|
|
|
|
2015-05-08 00:56:56 +00:00
|
|
|
/*
|
|
|
|
* SLIT parsing.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
|
|
|
slit_parse_table(ACPI_TABLE_SLIT *s)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
int i_domain, j_domain;
|
|
|
|
int offset = 0;
|
|
|
|
uint8_t e;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This maps the SLIT data into the VM-domain centric view.
|
|
|
|
* There may be sparse entries in the PXM namespace, so
|
|
|
|
* remap them to a VM-domain ID and if it doesn't exist,
|
|
|
|
* skip it.
|
|
|
|
*
|
|
|
|
* It should result in a packed 2d array of VM-domain
|
|
|
|
* locality information entries.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (bootverbose)
|
|
|
|
printf("SLIT.Localities: %d\n", (int) s->LocalityCount);
|
|
|
|
for (i = 0; i < s->LocalityCount; i++) {
|
|
|
|
i_domain = acpi_map_pxm_to_vm_domainid(i);
|
|
|
|
if (i_domain < 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (bootverbose)
|
|
|
|
printf("%d: ", i);
|
|
|
|
for (j = 0; j < s->LocalityCount; j++) {
|
|
|
|
j_domain = acpi_map_pxm_to_vm_domainid(j);
|
|
|
|
if (j_domain < 0)
|
|
|
|
continue;
|
|
|
|
e = s->Entry[i * s->LocalityCount + j];
|
|
|
|
if (bootverbose)
|
|
|
|
printf("%d ", (int) e);
|
|
|
|
/* 255 == "no locality information" */
|
|
|
|
if (e == 255)
|
|
|
|
vm_locality_table[offset] = -1;
|
|
|
|
else
|
|
|
|
vm_locality_table[offset] = e;
|
|
|
|
offset++;
|
|
|
|
}
|
|
|
|
if (bootverbose)
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look for an ACPI System Locality Distance Information Table ("SLIT")
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
parse_slit(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (resource_disabled("slit", 0)) {
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
slit_physaddr = acpi_find_table(ACPI_SIG_SLIT);
|
|
|
|
if (slit_physaddr == 0) {
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a pass over the table to populate the cpus[] and
|
|
|
|
* mem_info[] tables.
|
|
|
|
*/
|
|
|
|
slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT);
|
|
|
|
slit_parse_table(slit);
|
|
|
|
acpi_unmap_table(slit);
|
|
|
|
slit = NULL;
|
|
|
|
|
|
|
|
/* Tell the VM about it! */
|
|
|
|
mem_locality = vm_locality_table;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SRAT parsing.
|
|
|
|
*/
|
|
|
|
|
2011-10-05 16:03:47 +00:00
|
|
|
/*
|
|
|
|
* Returns true if a memory range overlaps with at least one range in
|
|
|
|
* phys_avail[].
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) {
|
|
|
|
if (phys_avail[i + 1] < start)
|
|
|
|
continue;
|
|
|
|
if (phys_avail[i] < end)
|
|
|
|
return (1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2010-07-27 20:40:46 +00:00
|
|
|
static void
|
|
|
|
srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg)
|
|
|
|
{
|
|
|
|
ACPI_SRAT_CPU_AFFINITY *cpu;
|
|
|
|
ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic;
|
|
|
|
ACPI_SRAT_MEM_AFFINITY *mem;
|
|
|
|
int domain, i, slot;
|
|
|
|
|
|
|
|
switch (entry->Type) {
|
|
|
|
case ACPI_SRAT_TYPE_CPU_AFFINITY:
|
|
|
|
cpu = (ACPI_SRAT_CPU_AFFINITY *)entry;
|
|
|
|
domain = cpu->ProximityDomainLo |
|
|
|
|
cpu->ProximityDomainHi[0] << 8 |
|
|
|
|
cpu->ProximityDomainHi[1] << 16 |
|
|
|
|
cpu->ProximityDomainHi[2] << 24;
|
|
|
|
if (bootverbose)
|
|
|
|
printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
|
|
|
|
cpu->ApicId, domain,
|
|
|
|
(cpu->Flags & ACPI_SRAT_CPU_ENABLED) ?
|
|
|
|
"enabled" : "disabled");
|
|
|
|
if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
|
|
|
|
break;
|
|
|
|
KASSERT(!cpus[cpu->ApicId].enabled,
|
|
|
|
("Duplicate local APIC ID %u", cpu->ApicId));
|
|
|
|
cpus[cpu->ApicId].domain = domain;
|
|
|
|
cpus[cpu->ApicId].enabled = 1;
|
|
|
|
break;
|
|
|
|
case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
|
|
|
|
x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry;
|
|
|
|
if (bootverbose)
|
|
|
|
printf("SRAT: Found CPU APIC ID %u domain %d: %s\n",
|
|
|
|
x2apic->ApicId, x2apic->ProximityDomain,
|
|
|
|
(x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ?
|
|
|
|
"enabled" : "disabled");
|
|
|
|
if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED))
|
|
|
|
break;
|
|
|
|
KASSERT(!cpus[x2apic->ApicId].enabled,
|
|
|
|
("Duplicate local APIC ID %u", x2apic->ApicId));
|
|
|
|
cpus[x2apic->ApicId].domain = x2apic->ProximityDomain;
|
|
|
|
cpus[x2apic->ApicId].enabled = 1;
|
|
|
|
break;
|
|
|
|
case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
|
|
|
|
mem = (ACPI_SRAT_MEM_AFFINITY *)entry;
|
|
|
|
if (bootverbose)
|
|
|
|
printf(
|
|
|
|
"SRAT: Found memory domain %d addr %jx len %jx: %s\n",
|
|
|
|
mem->ProximityDomain, (uintmax_t)mem->BaseAddress,
|
|
|
|
(uintmax_t)mem->Length,
|
|
|
|
(mem->Flags & ACPI_SRAT_MEM_ENABLED) ?
|
|
|
|
"enabled" : "disabled");
|
|
|
|
if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
|
|
|
|
break;
|
2011-10-05 16:03:47 +00:00
|
|
|
if (!overlaps_phys_avail(mem->BaseAddress,
|
|
|
|
mem->BaseAddress + mem->Length)) {
|
|
|
|
printf("SRAT: Ignoring memory at addr %jx\n",
|
|
|
|
(uintmax_t)mem->BaseAddress);
|
|
|
|
break;
|
|
|
|
}
|
2010-07-27 20:40:46 +00:00
|
|
|
if (num_mem == VM_PHYSSEG_MAX) {
|
|
|
|
printf("SRAT: Too many memory regions\n");
|
|
|
|
*(int *)arg = ENXIO;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
slot = num_mem;
|
|
|
|
for (i = 0; i < num_mem; i++) {
|
|
|
|
if (mem_info[i].end <= mem->BaseAddress)
|
|
|
|
continue;
|
|
|
|
if (mem_info[i].start <
|
|
|
|
(mem->BaseAddress + mem->Length)) {
|
|
|
|
printf("SRAT: Overlapping memory entries\n");
|
|
|
|
*(int *)arg = ENXIO;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
slot = i;
|
|
|
|
}
|
|
|
|
for (i = num_mem; i > slot; i--)
|
|
|
|
mem_info[i] = mem_info[i - 1];
|
|
|
|
mem_info[slot].start = mem->BaseAddress;
|
|
|
|
mem_info[slot].end = mem->BaseAddress + mem->Length;
|
|
|
|
mem_info[slot].domain = mem->ProximityDomain;
|
|
|
|
num_mem++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure each memory domain has at least one CPU and that each CPU
|
|
|
|
* has at least one memory domain.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
check_domains(void)
|
|
|
|
{
|
|
|
|
int found, i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < num_mem; i++) {
|
|
|
|
found = 0;
|
|
|
|
for (j = 0; j <= MAX_APIC_ID; j++)
|
2010-07-29 17:37:35 +00:00
|
|
|
if (cpus[j].enabled &&
|
|
|
|
cpus[j].domain == mem_info[i].domain) {
|
2010-07-27 20:40:46 +00:00
|
|
|
cpus[j].has_memory = 1;
|
|
|
|
found++;
|
|
|
|
}
|
|
|
|
if (!found) {
|
|
|
|
printf("SRAT: No CPU found for memory domain %d\n",
|
|
|
|
mem_info[i].domain);
|
|
|
|
return (ENXIO);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 0; i <= MAX_APIC_ID; i++)
|
|
|
|
if (cpus[i].enabled && !cpus[i].has_memory) {
|
|
|
|
printf("SRAT: No memory found for CPU %d\n", i);
|
|
|
|
return (ENXIO);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that the SRAT memory regions cover all of the regions in
|
|
|
|
* phys_avail[].
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
check_phys_avail(void)
|
|
|
|
{
|
|
|
|
vm_paddr_t address;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
/* j is the current offset into phys_avail[]. */
|
|
|
|
address = phys_avail[0];
|
|
|
|
j = 0;
|
|
|
|
for (i = 0; i < num_mem; i++) {
|
|
|
|
/*
|
|
|
|
* Consume as many phys_avail[] entries as fit in this
|
|
|
|
* region.
|
|
|
|
*/
|
|
|
|
while (address >= mem_info[i].start &&
|
|
|
|
address <= mem_info[i].end) {
|
|
|
|
/*
|
|
|
|
* If we cover the rest of this phys_avail[] entry,
|
|
|
|
* advance to the next entry.
|
|
|
|
*/
|
|
|
|
if (phys_avail[j + 1] <= mem_info[i].end) {
|
|
|
|
j += 2;
|
|
|
|
if (phys_avail[j] == 0 &&
|
|
|
|
phys_avail[j + 1] == 0) {
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
address = phys_avail[j];
|
|
|
|
} else
|
|
|
|
address = mem_info[i].end + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
printf("SRAT: No memory region found for %jx - %jx\n",
|
|
|
|
(uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]);
|
|
|
|
return (ENXIO);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Renumber the memory domains to be compact and zero-based if not
|
2012-01-03 20:53:58 +00:00
|
|
|
* already. Returns an error if there are too many domains.
|
2010-07-27 20:40:46 +00:00
|
|
|
*/
|
2012-01-03 20:53:58 +00:00
|
|
|
static int
|
2010-07-27 20:40:46 +00:00
|
|
|
renumber_domains(void)
|
|
|
|
{
|
2013-05-13 15:40:51 +00:00
|
|
|
int i, j, slot;
|
2010-07-27 20:40:46 +00:00
|
|
|
|
|
|
|
/* Enumerate all the domains. */
|
2013-05-13 15:40:51 +00:00
|
|
|
vm_ndomains = 0;
|
2010-07-27 20:40:46 +00:00
|
|
|
for (i = 0; i < num_mem; i++) {
|
|
|
|
/* See if this domain is already known. */
|
2013-05-13 15:40:51 +00:00
|
|
|
for (j = 0; j < vm_ndomains; j++) {
|
2014-10-09 05:34:28 +00:00
|
|
|
if (vm_domains[j] >= mem_info[i].domain)
|
2010-07-27 20:40:46 +00:00
|
|
|
break;
|
|
|
|
}
|
2014-10-09 05:34:28 +00:00
|
|
|
if (j < vm_ndomains && vm_domains[j] == mem_info[i].domain)
|
2010-07-27 20:40:46 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Insert the new domain at slot 'j'. */
|
|
|
|
slot = j;
|
2013-05-13 15:40:51 +00:00
|
|
|
for (j = vm_ndomains; j > slot; j--)
|
2014-10-09 05:34:28 +00:00
|
|
|
vm_domains[j] = vm_domains[j - 1];
|
|
|
|
vm_domains[slot] = mem_info[i].domain;
|
2013-05-13 15:40:51 +00:00
|
|
|
vm_ndomains++;
|
|
|
|
if (vm_ndomains > MAXMEMDOM) {
|
|
|
|
vm_ndomains = 1;
|
2012-01-03 20:53:58 +00:00
|
|
|
printf("SRAT: Too many memory domains\n");
|
|
|
|
return (EFBIG);
|
|
|
|
}
|
2010-07-27 20:40:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Renumber each domain to its index in the sorted 'domains' list. */
|
2013-05-13 15:40:51 +00:00
|
|
|
for (i = 0; i < vm_ndomains; i++) {
|
2010-07-27 20:40:46 +00:00
|
|
|
/*
|
|
|
|
* If the domain is already the right value, no need
|
|
|
|
* to renumber.
|
|
|
|
*/
|
2014-10-09 05:34:28 +00:00
|
|
|
if (vm_domains[i] == i)
|
2010-07-27 20:40:46 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Walk the cpu[] and mem_info[] arrays to renumber. */
|
|
|
|
for (j = 0; j < num_mem; j++)
|
2014-10-09 05:34:28 +00:00
|
|
|
if (mem_info[j].domain == vm_domains[i])
|
2010-07-27 20:40:46 +00:00
|
|
|
mem_info[j].domain = i;
|
|
|
|
for (j = 0; j <= MAX_APIC_ID; j++)
|
2014-10-09 05:34:28 +00:00
|
|
|
if (cpus[j].enabled && cpus[j].domain == vm_domains[i])
|
2010-07-27 20:40:46 +00:00
|
|
|
cpus[j].domain = i;
|
|
|
|
}
|
2013-05-13 15:40:51 +00:00
|
|
|
KASSERT(vm_ndomains > 0,
|
|
|
|
("renumber_domains: invalid final vm_ndomains setup"));
|
|
|
|
|
2012-01-03 20:53:58 +00:00
|
|
|
return (0);
|
2010-07-27 20:40:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look for an ACPI System Resource Affinity Table ("SRAT")
|
|
|
|
*/
|
2015-05-08 00:56:56 +00:00
|
|
|
static int
|
|
|
|
parse_srat(void)
|
2010-07-27 20:40:46 +00:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (resource_disabled("srat", 0))
|
2015-05-08 00:56:56 +00:00
|
|
|
return (-1);
|
2010-07-27 20:40:46 +00:00
|
|
|
|
|
|
|
srat_physaddr = acpi_find_table(ACPI_SIG_SRAT);
|
|
|
|
if (srat_physaddr == 0)
|
2015-05-08 00:56:56 +00:00
|
|
|
return (-1);
|
2010-07-27 20:40:46 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a pass over the table to populate the cpus[] and
|
|
|
|
* mem_info[] tables.
|
|
|
|
*/
|
|
|
|
srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT);
|
|
|
|
error = 0;
|
|
|
|
srat_walk_table(srat_parse_entry, &error);
|
|
|
|
acpi_unmap_table(srat);
|
|
|
|
srat = NULL;
|
2012-01-03 20:53:58 +00:00
|
|
|
if (error || check_domains() != 0 || check_phys_avail() != 0 ||
|
|
|
|
renumber_domains() != 0) {
|
2010-07-27 20:40:46 +00:00
|
|
|
srat_physaddr = 0;
|
2015-05-08 00:56:56 +00:00
|
|
|
return (-1);
|
2010-07-27 20:40:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Point vm_phys at our memory affinity table. */
|
|
|
|
mem_affinity = mem_info;
|
2015-05-08 00:56:56 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
init_mem_locality(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
2015-05-15 21:33:19 +00:00
|
|
|
* For now, assume -1 == "no locality information for
|
2015-05-08 00:56:56 +00:00
|
|
|
* this pairing.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++)
|
|
|
|
vm_locality_table[i] = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
parse_acpi_tables(void *dummy)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (parse_srat() < 0)
|
|
|
|
return;
|
|
|
|
init_mem_locality();
|
|
|
|
(void) parse_slit();
|
2010-07-27 20:40:46 +00:00
|
|
|
}
|
2015-05-08 00:56:56 +00:00
|
|
|
SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables,
|
|
|
|
NULL);
|
2010-07-27 20:40:46 +00:00
|
|
|
|
|
|
|
static void
|
|
|
|
srat_walk_table(acpi_subtable_handler *handler, void *arg)
|
|
|
|
{
|
|
|
|
|
|
|
|
acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length,
|
|
|
|
handler, arg);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-01-08 15:53:13 +00:00
|
|
|
* Setup per-CPU domain IDs.
|
2010-07-27 20:40:46 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
srat_set_cpus(void *dummy)
|
|
|
|
{
|
|
|
|
struct cpu_info *cpu;
|
|
|
|
struct pcpu *pc;
|
|
|
|
u_int i;
|
|
|
|
|
|
|
|
if (srat_physaddr == 0)
|
|
|
|
return;
|
|
|
|
for (i = 0; i < MAXCPU; i++) {
|
|
|
|
if (CPU_ABSENT(i))
|
|
|
|
continue;
|
|
|
|
pc = pcpu_find(i);
|
|
|
|
KASSERT(pc != NULL, ("no pcpu data for CPU %u", i));
|
|
|
|
cpu = &cpus[pc->pc_apic_id];
|
|
|
|
if (!cpu->enabled)
|
|
|
|
panic("SRAT: CPU with APIC ID %u is not known",
|
|
|
|
pc->pc_apic_id);
|
|
|
|
pc->pc_domain = cpu->domain;
|
2015-01-08 15:53:13 +00:00
|
|
|
CPU_SET(i, &cpuset_domain[cpu->domain]);
|
2010-07-27 20:40:46 +00:00
|
|
|
if (bootverbose)
|
|
|
|
printf("SRAT: CPU %u has memory domain %d\n", i,
|
|
|
|
cpu->domain);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL);
|
2014-10-09 05:34:28 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Map a _PXM value to a VM domain ID.
|
|
|
|
*
|
|
|
|
* Returns the domain ID, or -1 if no domain ID was found.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
acpi_map_pxm_to_vm_domainid(int pxm)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < vm_ndomains; i++) {
|
|
|
|
if (vm_domains[i] == pxm)
|
|
|
|
return (i);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2013-05-07 22:46:24 +00:00
|
|
|
#endif /* MAXMEMDOM > 1 */
|