Add NUMA support to powerpc
Summary: Initial NUMA support: - associate CPU with domain - associate memory ranges with domain - identify domain for devices - limit device interrupt binding to appropriate domain - Additionally fixes a bug in the setting of Maxmem which led to only memory attached to the first socket being enabled for DMA A pmap variant can opt in to numa support by by calling `numa_mem_regions` at the end of pmap_bootstrap - registering the corresponding ranges with the VM. This yields a ~20% improvement in build times of llvm on dual socket POWER9 over non-NUMA. Original patch by mmacy. Differential Revision: https://reviews.freebsd.org/D17933
This commit is contained in:
parent
e9aae3496e
commit
49d9a59783
@ -146,8 +146,9 @@ extern void *slbtrap, *slbtrapend;
|
||||
*/
|
||||
static struct mem_region *regions;
|
||||
static struct mem_region *pregions;
|
||||
static struct numa_mem_region *numa_pregions;
|
||||
static u_int phys_avail_count;
|
||||
static int regions_sz, pregions_sz;
|
||||
static int regions_sz, pregions_sz, numapregions_sz;
|
||||
|
||||
extern void bs_remap_earlyboot(void);
|
||||
|
||||
@ -1048,6 +1049,8 @@ moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend
|
||||
PMAP_UNLOCK(kernel_pmap);
|
||||
}
|
||||
}
|
||||
|
||||
numa_mem_regions(&numa_pregions, &numapregions_sz);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -35,6 +35,7 @@ options POWERNV #Non-virtualized OpenPOWER systems
|
||||
|
||||
options FDT #Flattened Device Tree
|
||||
options SCHED_ULE #ULE scheduler
|
||||
options NUMA #Non-Uniform Memory Architecture support
|
||||
options PREEMPTION #Enable kernel thread preemption
|
||||
options VIMAGE # Subsystem virtualization, e.g. VNET
|
||||
options INET #InterNETworking
|
||||
|
@ -54,7 +54,7 @@ u_int powerpc_get_irq(uint32_t, u_int);
|
||||
void powerpc_dispatch_intr(u_int, struct trapframe *);
|
||||
int powerpc_enable_intr(void);
|
||||
int powerpc_setup_intr(const char *, u_int, driver_filter_t, driver_intr_t,
|
||||
void *, enum intr_type, void **);
|
||||
void *, enum intr_type, void **, int);
|
||||
int powerpc_teardown_intr(void *);
|
||||
int powerpc_bind_intr(u_int irq, u_char cpu);
|
||||
int powerpc_config_intr(int, enum intr_trigger, enum intr_polarity);
|
||||
|
@ -47,7 +47,11 @@ boolean_t OF_bootstrap(void);
|
||||
void OF_reboot(void);
|
||||
|
||||
void ofw_mem_regions(struct mem_region *, int *, struct mem_region *, int *);
|
||||
void ofw_numa_mem_regions(struct numa_mem_region *, int *);
|
||||
void ofw_quiesce(void); /* Must be called before VM is up! */
|
||||
void ofw_save_trap_vec(char *);
|
||||
int ofw_pcibus_get_domain(device_t dev, device_t child, int *domain);
|
||||
int ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op,
|
||||
size_t setsize, cpuset_t *cpuset);
|
||||
|
||||
#endif /* _MACHINE_OFW_MACHDEP_H_ */
|
||||
|
@ -82,7 +82,7 @@
|
||||
#endif /* SMP || KLD_MODULE */
|
||||
|
||||
#ifndef MAXMEMDOM
|
||||
#define MAXMEMDOM 1
|
||||
#define MAXMEMDOM 8
|
||||
#endif
|
||||
|
||||
#define ALIGNBYTES _ALIGNBYTES
|
||||
|
@ -45,9 +45,16 @@ struct mem_region {
|
||||
uint64_t mr_size;
|
||||
};
|
||||
|
||||
struct numa_mem_region {
|
||||
uint64_t mr_start;
|
||||
uint64_t mr_size;
|
||||
uint64_t mr_domain;
|
||||
};
|
||||
|
||||
/* Documentation for these functions is in platform_if.m */
|
||||
|
||||
void mem_regions(struct mem_region **, int *, struct mem_region **, int *);
|
||||
void numa_mem_regions(struct numa_mem_region **, int *);
|
||||
vm_offset_t platform_real_maxaddr(void);
|
||||
|
||||
u_long platform_timebase_freq(struct cpuref *);
|
||||
|
@ -52,6 +52,7 @@ void ipi_selected(cpuset_t cpus, int ipi);
|
||||
struct cpuref {
|
||||
uintptr_t cr_hwref;
|
||||
u_int cr_cpuid;
|
||||
u_int cr_domain;
|
||||
};
|
||||
|
||||
void pmap_cpu_bootstrap(int);
|
||||
|
@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_phys.h>
|
||||
|
||||
#include <machine/bus.h>
|
||||
#include <machine/cpu.h>
|
||||
@ -222,9 +223,57 @@ parse_ofw_memory(phandle_t node, const char *prop, struct mem_region *output)
|
||||
|
||||
j++;
|
||||
}
|
||||
sz = j*sizeof(output[0]);
|
||||
|
||||
return (sz);
|
||||
return (j);
|
||||
}
|
||||
|
||||
static int
|
||||
parse_numa_ofw_memory(phandle_t node, const char *prop,
|
||||
struct numa_mem_region *output)
|
||||
{
|
||||
cell_t address_cells, size_cells;
|
||||
cell_t OFmem[4 * PHYS_AVAIL_SZ];
|
||||
int sz, i, j;
|
||||
phandle_t phandle;
|
||||
|
||||
sz = 0;
|
||||
|
||||
/*
|
||||
* Get #address-cells from root node, defaulting to 1 if it cannot
|
||||
* be found.
|
||||
*/
|
||||
phandle = OF_finddevice("/");
|
||||
if (OF_getencprop(phandle, "#address-cells", &address_cells,
|
||||
sizeof(address_cells)) < (ssize_t)sizeof(address_cells))
|
||||
address_cells = 1;
|
||||
if (OF_getencprop(phandle, "#size-cells", &size_cells,
|
||||
sizeof(size_cells)) < (ssize_t)sizeof(size_cells))
|
||||
size_cells = 1;
|
||||
|
||||
/*
|
||||
* Get memory.
|
||||
*/
|
||||
if (node == -1 || (sz = OF_getencprop(node, prop,
|
||||
OFmem, sizeof(OFmem))) <= 0)
|
||||
panic("Physical memory map not found");
|
||||
|
||||
i = 0;
|
||||
j = 0;
|
||||
while (i < sz/sizeof(cell_t)) {
|
||||
output[j].mr_start = OFmem[i++];
|
||||
if (address_cells == 2) {
|
||||
output[j].mr_start <<= 32;
|
||||
output[j].mr_start += OFmem[i++];
|
||||
}
|
||||
output[j].mr_size = OFmem[i++];
|
||||
if (size_cells == 2) {
|
||||
output[j].mr_size <<= 32;
|
||||
output[j].mr_size += OFmem[i++];
|
||||
}
|
||||
j++;
|
||||
}
|
||||
|
||||
return (j);
|
||||
}
|
||||
|
||||
#ifdef FDT
|
||||
@ -402,6 +451,51 @@ excise_fdt_reserved(struct mem_region *avail, int asz)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is called during powerpc_init, before the system is really initialized.
|
||||
* It shall provide the total and the available regions of RAM.
|
||||
* The available regions need not take the kernel into account.
|
||||
*/
|
||||
void
|
||||
ofw_numa_mem_regions(struct numa_mem_region *memp, int *memsz)
|
||||
{
|
||||
phandle_t phandle;
|
||||
int res, count, msz;
|
||||
char name[31];
|
||||
cell_t associativity[5];
|
||||
struct numa_mem_region *curmemp;
|
||||
|
||||
msz = 0;
|
||||
/*
|
||||
* Get memory from all the /memory nodes.
|
||||
*/
|
||||
for (phandle = OF_child(OF_peer(0)); phandle != 0;
|
||||
phandle = OF_peer(phandle)) {
|
||||
if (OF_getprop(phandle, "name", name, sizeof(name)) <= 0)
|
||||
continue;
|
||||
if (strncmp(name, "memory@", strlen("memory@")) != 0)
|
||||
continue;
|
||||
|
||||
count = parse_numa_ofw_memory(phandle, "reg", &memp[msz]);
|
||||
if (count == 0)
|
||||
continue;
|
||||
curmemp = &memp[msz];
|
||||
res = OF_getproplen(phandle, "ibm,associativity");
|
||||
if (res <= 0)
|
||||
continue;
|
||||
MPASS(count == 1);
|
||||
OF_getencprop(phandle, "ibm,associativity",
|
||||
associativity, res);
|
||||
curmemp->mr_domain = associativity[3] - 1;
|
||||
if (bootverbose)
|
||||
printf("%s %#jx-%#jx domain(%ju)\n",
|
||||
name, (uintmax_t)curmemp->mr_start,
|
||||
(uintmax_t)curmemp->mr_start + curmemp->mr_size,
|
||||
(uintmax_t)curmemp->mr_domain);
|
||||
msz += count;
|
||||
}
|
||||
*memsz = msz;
|
||||
}
|
||||
/*
|
||||
* This is called during powerpc_init, before the system is really initialized.
|
||||
* It shall provide the total and the available regions of RAM.
|
||||
@ -430,7 +524,7 @@ ofw_mem_regions(struct mem_region *memp, int *memsz,
|
||||
continue;
|
||||
|
||||
res = parse_ofw_memory(phandle, "reg", &memp[msz]);
|
||||
msz += res/sizeof(struct mem_region);
|
||||
msz += res;
|
||||
|
||||
/*
|
||||
* On POWER9 Systems we might have both linux,usable-memory and
|
||||
@ -446,7 +540,7 @@ ofw_mem_regions(struct mem_region *memp, int *memsz,
|
||||
&availp[asz]);
|
||||
else
|
||||
res = parse_ofw_memory(phandle, "reg", &availp[asz]);
|
||||
asz += res/sizeof(struct mem_region);
|
||||
asz += res;
|
||||
}
|
||||
|
||||
#ifdef FDT
|
||||
|
@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/pciio.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <dev/ofw/ofw_bus.h>
|
||||
#include <dev/ofw/ofw_bus_subr.h>
|
||||
@ -80,6 +81,8 @@ static device_method_t ofw_pcibus_methods[] = {
|
||||
DEVMETHOD(bus_child_deleted, ofw_pcibus_child_deleted),
|
||||
DEVMETHOD(bus_child_pnpinfo_str, ofw_pcibus_child_pnpinfo_str_method),
|
||||
DEVMETHOD(bus_rescan, bus_null_rescan),
|
||||
DEVMETHOD(bus_get_cpus, ofw_pcibus_get_cpus),
|
||||
DEVMETHOD(bus_get_domain, ofw_pcibus_get_domain),
|
||||
|
||||
/* PCI interface */
|
||||
DEVMETHOD(pci_alloc_devinfo, ofw_pcibus_alloc_devinfo),
|
||||
@ -382,3 +385,76 @@ ofw_pcibus_get_devinfo(device_t bus, device_t dev)
|
||||
return (&dinfo->opd_obdinfo);
|
||||
}
|
||||
|
||||
static int
|
||||
ofw_pcibus_parse_associativity(device_t dev, int *domain)
|
||||
{
|
||||
phandle_t node;
|
||||
cell_t associativity[5];
|
||||
int res;
|
||||
|
||||
if ((node = ofw_bus_get_node(dev)) == -1) {
|
||||
device_printf(dev, "no ofw node found\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
res = OF_getproplen(node, "ibm,associativity");
|
||||
if (res <= 0)
|
||||
return (ENXIO);
|
||||
OF_getencprop(node, "ibm,associativity",
|
||||
associativity, res);
|
||||
|
||||
*domain = associativity[3] - 1;
|
||||
if (bootverbose)
|
||||
device_printf(dev, "domain(%d)\n", *domain);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
|
||||
cpuset_t *cpuset)
|
||||
{
|
||||
int d, error;
|
||||
|
||||
error = ofw_pcibus_parse_associativity(child, &d);
|
||||
if (error)
|
||||
return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
|
||||
|
||||
switch (op) {
|
||||
case LOCAL_CPUS:
|
||||
if (setsize != sizeof(cpuset_t))
|
||||
return (EINVAL);
|
||||
*cpuset = cpuset_domain[d];
|
||||
return (0);
|
||||
case INTR_CPUS:
|
||||
error = bus_generic_get_cpus(dev, child, op, setsize, cpuset);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
if (setsize != sizeof(cpuset_t))
|
||||
return (EINVAL);
|
||||
CPU_AND(cpuset, &cpuset_domain[d]);
|
||||
return (0);
|
||||
default:
|
||||
return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch the NUMA domain for the given device 'dev'.
|
||||
*
|
||||
* If a device has a _PXM method, map that to a NUMA domain.
|
||||
* Otherwise, pass the request up to the parent.
|
||||
* If there's no matching domain or the domain cannot be
|
||||
* determined, return ENOENT.
|
||||
*/
|
||||
int
|
||||
ofw_pcibus_get_domain(device_t dev, device_t child, int *domain)
|
||||
{
|
||||
int d, error;
|
||||
|
||||
error = ofw_pcibus_parse_associativity(child, &d);
|
||||
/* No ofw node; go up a level */
|
||||
if (error)
|
||||
return (bus_generic_get_domain(dev, child, domain));
|
||||
*domain = d;
|
||||
return (0);
|
||||
}
|
||||
|
@ -149,6 +149,8 @@ static device_method_t opalpci_methods[] = {
|
||||
|
||||
/* Bus interface */
|
||||
DEVMETHOD(bus_get_dma_tag, opalpci_get_dma_tag),
|
||||
DEVMETHOD(bus_get_cpus, ofw_pcibus_get_cpus),
|
||||
DEVMETHOD(bus_get_domain, ofw_pcibus_get_domain),
|
||||
|
||||
DEVMETHOD_END
|
||||
};
|
||||
@ -367,7 +369,7 @@ opalpci_attach(device_t dev)
|
||||
tce_size = max_tce_size(dev);
|
||||
maxmem = roundup2(powerpc_ptob(Maxmem), tce_size);
|
||||
entries = round_pow2(maxmem / tce_size);
|
||||
tce_tbl_size = max(entries * sizeof(uint64_t), 4096);
|
||||
tce_tbl_size = MAX(entries * sizeof(uint64_t), 4096);
|
||||
if (entries > OPAL_PCI_TCE_MAX_ENTRIES)
|
||||
panic("POWERNV supports only %jdGB of memory space\n",
|
||||
(uintmax_t)((OPAL_PCI_TCE_MAX_ENTRIES * tce_size) >> 30));
|
||||
|
@ -65,6 +65,7 @@ static int powernv_probe(platform_t);
|
||||
static int powernv_attach(platform_t);
|
||||
void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,
|
||||
struct mem_region *avail, int *availsz);
|
||||
static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);
|
||||
static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);
|
||||
static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);
|
||||
static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);
|
||||
@ -83,6 +84,7 @@ static platform_method_t powernv_methods[] = {
|
||||
PLATFORMMETHOD(platform_probe, powernv_probe),
|
||||
PLATFORMMETHOD(platform_attach, powernv_attach),
|
||||
PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions),
|
||||
PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions),
|
||||
PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq),
|
||||
|
||||
PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init),
|
||||
@ -250,6 +252,13 @@ powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
|
||||
ofw_mem_regions(phys, physsz, avail, availsz);
|
||||
}
|
||||
|
||||
static void
|
||||
powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)
|
||||
{
|
||||
|
||||
ofw_numa_mem_regions(phys, physsz);
|
||||
}
|
||||
|
||||
static u_long
|
||||
powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)
|
||||
{
|
||||
@ -313,15 +322,13 @@ powernv_cpuref_init(void)
|
||||
if (res > 0 && strcmp(buf, "cpu") == 0) {
|
||||
res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
|
||||
if (res > 0) {
|
||||
|
||||
|
||||
OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
|
||||
interrupt_servers, res);
|
||||
|
||||
for (a = 0; a < res/sizeof(cell_t); a++) {
|
||||
tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
|
||||
tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
|
||||
|
||||
tmp_cpuref[tmp_cpuref_cnt].cr_domain = interrupt_servers[a] >> 11;
|
||||
if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
|
||||
bsp = tmp_cpuref_cnt;
|
||||
|
||||
@ -335,11 +342,13 @@ powernv_cpuref_init(void)
|
||||
for (a = bsp; a < tmp_cpuref_cnt; a++) {
|
||||
platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
|
||||
platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
|
||||
platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
|
||||
platform_cpuref_cnt++;
|
||||
}
|
||||
for (a = 0; a < bsp; a++) {
|
||||
platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
|
||||
platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
|
||||
platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
|
||||
platform_cpuref_cnt++;
|
||||
}
|
||||
|
||||
@ -356,6 +365,7 @@ powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
|
||||
|
||||
cpuref->cr_cpuid = 0;
|
||||
cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
|
||||
cpuref->cr_domain = platform_cpuref[0].cr_domain;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -374,6 +384,7 @@ powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
|
||||
|
||||
cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
|
||||
cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
|
||||
cpuref->cr_domain = platform_cpuref[id].cr_domain;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -384,6 +395,7 @@ powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
|
||||
|
||||
cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
|
||||
cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
|
||||
cpuref->cr_domain = platform_cpuref[0].cr_domain;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -97,16 +97,17 @@ struct powerpc_intr {
|
||||
struct intr_event *event;
|
||||
long *cntp;
|
||||
void *priv; /* PIC-private data */
|
||||
u_int irq;
|
||||
device_t pic;
|
||||
u_int irq;
|
||||
u_int intline;
|
||||
u_int vector;
|
||||
u_int cntindex;
|
||||
cpuset_t cpu;
|
||||
enum intr_trigger trig;
|
||||
enum intr_polarity pol;
|
||||
int fwcode;
|
||||
int ipi;
|
||||
int pi_domain;
|
||||
enum intr_trigger trig;
|
||||
enum intr_polarity pol;
|
||||
cpuset_t pi_cpuset;
|
||||
};
|
||||
|
||||
struct pic {
|
||||
@ -203,7 +204,7 @@ smp_intr_init(void *dummy __unused)
|
||||
for (vector = 0; vector < nvectors; vector++) {
|
||||
i = powerpc_intrs[vector];
|
||||
if (i != NULL && i->event != NULL && i->pic == root_pic)
|
||||
PIC_BIND(i->pic, i->intline, i->cpu, &i->priv);
|
||||
PIC_BIND(i->pic, i->intline, i->pi_cpuset, &i->priv);
|
||||
}
|
||||
}
|
||||
SYSINIT(smp_intr_init, SI_SUB_SMP, SI_ORDER_ANY, smp_intr_init, NULL);
|
||||
@ -256,9 +257,9 @@ intr_lookup(u_int irq)
|
||||
i->ipi = 0;
|
||||
|
||||
#ifdef SMP
|
||||
i->cpu = all_cpus;
|
||||
i->pi_cpuset = all_cpus;
|
||||
#else
|
||||
CPU_SETOF(0, &i->cpu);
|
||||
CPU_SETOF(0, &i->pi_cpuset);
|
||||
#endif
|
||||
|
||||
for (vector = 0; vector < num_io_irqs && vector <= nvectors;
|
||||
@ -347,12 +348,12 @@ powerpc_assign_intr_cpu(void *arg, int cpu)
|
||||
struct powerpc_intr *i = arg;
|
||||
|
||||
if (cpu == NOCPU)
|
||||
i->cpu = all_cpus;
|
||||
i->pi_cpuset = all_cpus;
|
||||
else
|
||||
CPU_SETOF(cpu, &i->cpu);
|
||||
CPU_SETOF(cpu, &i->pi_cpuset);
|
||||
|
||||
if (!cold && i->pic != NULL && i->pic == root_pic)
|
||||
PIC_BIND(i->pic, i->intline, i->cpu, &i->priv);
|
||||
PIC_BIND(i->pic, i->intline, i->pi_cpuset, &i->priv);
|
||||
|
||||
return (0);
|
||||
#else
|
||||
@ -469,7 +470,8 @@ powerpc_enable_intr(void)
|
||||
error = powerpc_setup_intr("IPI",
|
||||
MAP_IRQ(piclist[n].node, piclist[n].irqs),
|
||||
powerpc_ipi_handler, NULL, NULL,
|
||||
INTR_TYPE_MISC | INTR_EXCL, &ipi_cookie);
|
||||
INTR_TYPE_MISC | INTR_EXCL, &ipi_cookie,
|
||||
0 /* domain XXX */);
|
||||
if (error) {
|
||||
printf("unable to setup IPI handler\n");
|
||||
return (error);
|
||||
@ -512,7 +514,8 @@ powerpc_enable_intr(void)
|
||||
|
||||
int
|
||||
powerpc_setup_intr(const char *name, u_int irq, driver_filter_t filter,
|
||||
driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
|
||||
driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep,
|
||||
int domain)
|
||||
{
|
||||
struct powerpc_intr *i;
|
||||
int error, enable = 0;
|
||||
@ -533,7 +536,13 @@ powerpc_setup_intr(const char *name, u_int irq, driver_filter_t filter,
|
||||
|
||||
error = intr_event_add_handler(i->event, name, filter, handler, arg,
|
||||
intr_priority(flags), flags, cookiep);
|
||||
|
||||
if (error)
|
||||
return (error);
|
||||
i->pi_domain = domain;
|
||||
if (strcmp(name, "IPI") != 0) {
|
||||
CPU_ZERO(&i->pi_cpuset);
|
||||
CPU_COPY(&cpuset_domain[domain], &i->pi_cpuset);
|
||||
}
|
||||
mtx_lock(&intr_table_lock);
|
||||
intrcnt_setname(i->event->ie_fullname, i->cntindex);
|
||||
mtx_unlock(&intr_table_lock);
|
||||
@ -551,7 +560,7 @@ powerpc_setup_intr(const char *name, u_int irq, driver_filter_t filter,
|
||||
PIC_CONFIG(i->pic, i->intline, i->trig, i->pol);
|
||||
|
||||
if (i->pic == root_pic)
|
||||
PIC_BIND(i->pic, i->intline, i->cpu, &i->priv);
|
||||
PIC_BIND(i->pic, i->intline, i->pi_cpuset, &i->priv);
|
||||
|
||||
if (enable)
|
||||
PIC_ENABLE(i->pic, i->intline, i->vector,
|
||||
|
@ -182,6 +182,15 @@ cpu_mp_start(void)
|
||||
pc->pc_bsp = 1;
|
||||
}
|
||||
pc->pc_hwref = cpu.cr_hwref;
|
||||
|
||||
if (vm_ndomains > 1)
|
||||
pc->pc_domain = cpu.cr_domain;
|
||||
else
|
||||
pc->pc_domain = 0;
|
||||
|
||||
CPU_SET(pc->pc_cpuid, &cpuset_domain[pc->pc_domain]);
|
||||
KASSERT(pc->pc_domain < MAXMEMDOM, ("bad domain value %d\n",
|
||||
pc->pc_domain));
|
||||
CPU_SET(pc->pc_cpuid, &all_cpus);
|
||||
next:
|
||||
error = platform_smp_next_cpu(&cpu);
|
||||
@ -205,7 +214,7 @@ cpu_mp_announce(void)
|
||||
pc = pcpu_find(i);
|
||||
if (pc == NULL)
|
||||
continue;
|
||||
printf("cpu%d: dev=%x", i, (int)pc->pc_hwref);
|
||||
printf("cpu%d: dev=%x domain=%d ", i, (int)pc->pc_hwref, pc->pc_domain);
|
||||
if (pc->pc_bsp)
|
||||
printf(" (BSP)");
|
||||
printf("\n");
|
||||
|
@ -38,11 +38,13 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/bus.h>
|
||||
#include <sys/kdb.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/rman.h>
|
||||
#include <sys/smp.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
@ -67,6 +69,8 @@ static bus_teardown_intr_t nexus_teardown_intr;
|
||||
static bus_activate_resource_t nexus_activate_resource;
|
||||
static bus_deactivate_resource_t nexus_deactivate_resource;
|
||||
static bus_space_tag_t nexus_get_bus_tag(device_t, device_t);
|
||||
static int nexus_get_cpus(device_t, device_t, enum cpu_sets, size_t,
|
||||
cpuset_t *);
|
||||
#ifdef SMP
|
||||
static bus_bind_intr_t nexus_bind_intr;
|
||||
#endif
|
||||
@ -89,6 +93,7 @@ static device_method_t nexus_methods[] = {
|
||||
#endif
|
||||
DEVMETHOD(bus_config_intr, nexus_config_intr),
|
||||
DEVMETHOD(bus_get_bus_tag, nexus_get_bus_tag),
|
||||
DEVMETHOD(bus_get_cpus, nexus_get_cpus),
|
||||
|
||||
/* ofw_bus interface */
|
||||
DEVMETHOD(ofw_bus_map_intr, nexus_ofw_map_intr),
|
||||
@ -127,11 +132,13 @@ nexus_setup_intr(device_t bus __unused, device_t child, struct resource *r,
|
||||
int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg,
|
||||
void **cookiep)
|
||||
{
|
||||
int error;
|
||||
int error, domain;
|
||||
|
||||
if (r == NULL)
|
||||
panic("%s: NULL interrupt resource!", __func__);
|
||||
|
||||
if (cookiep != NULL)
|
||||
*cookiep = NULL;
|
||||
if ((rman_get_flags(r) & RF_SHAREABLE) == 0)
|
||||
flags |= INTR_EXCL;
|
||||
|
||||
@ -140,8 +147,13 @@ nexus_setup_intr(device_t bus __unused, device_t child, struct resource *r,
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (bus_get_domain(child, &domain) != 0) {
|
||||
if(bootverbose)
|
||||
device_printf(child, "no domain found\n");
|
||||
domain = 0;
|
||||
}
|
||||
error = powerpc_setup_intr(device_get_nameunit(child),
|
||||
rman_get_start(r), filt, intr, arg, flags, cookiep);
|
||||
rman_get_start(r), filt, intr, arg, flags, cookiep, domain);
|
||||
|
||||
return (error);
|
||||
}
|
||||
@ -164,6 +176,24 @@ nexus_get_bus_tag(device_t bus __unused, device_t child __unused)
|
||||
return(&bs_be_tag);
|
||||
}
|
||||
|
||||
static int
|
||||
nexus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
|
||||
cpuset_t *cpuset)
|
||||
{
|
||||
|
||||
switch (op) {
|
||||
#ifdef SMP
|
||||
case INTR_CPUS:
|
||||
if (setsize != sizeof(cpuset_t))
|
||||
return (EINVAL);
|
||||
*cpuset = all_cpus;
|
||||
return (0);
|
||||
#endif
|
||||
default:
|
||||
return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SMP
|
||||
static int
|
||||
nexus_bind_intr(device_t bus __unused, device_t child __unused,
|
||||
|
@ -48,13 +48,16 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_phys.h>
|
||||
|
||||
#include <machine/cpu.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/platform.h>
|
||||
#include <machine/platformvar.h>
|
||||
#include <machine/smp.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include "platform_if.h"
|
||||
|
||||
@ -67,9 +70,12 @@ static char plat_name[64] = "";
|
||||
SYSCTL_STRING(_hw, OID_AUTO, platform, CTLFLAG_RD | CTLFLAG_TUN,
|
||||
plat_name, 0, "Platform currently in use");
|
||||
|
||||
static struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1];
|
||||
static int vm_locality_table[MAXMEMDOM * MAXMEMDOM];
|
||||
static struct mem_region pregions[PHYS_AVAIL_SZ];
|
||||
static struct numa_mem_region numa_pregions[PHYS_AVAIL_SZ];
|
||||
static struct mem_region aregions[PHYS_AVAIL_SZ];
|
||||
static int npregions, naregions;
|
||||
static int nnumapregions, npregions, naregions;
|
||||
|
||||
/*
|
||||
* Memory region utilities: determine if two regions overlap,
|
||||
@ -112,6 +118,54 @@ mr_cmp(const void *a, const void *b)
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
numa_mem_regions(struct numa_mem_region **phys, int *physsz)
|
||||
{
|
||||
struct mem_affinity *mi;
|
||||
int i, j, maxdom, ndomain, offset;
|
||||
|
||||
nnumapregions = 0;
|
||||
PLATFORM_NUMA_MEM_REGIONS(plat_obj, numa_pregions, &nnumapregions);
|
||||
|
||||
if (physsz != NULL)
|
||||
*physsz = nnumapregions;
|
||||
if (phys != NULL)
|
||||
*phys = numa_pregions;
|
||||
if (physsz == NULL || phys == NULL) {
|
||||
printf("unset value\n");
|
||||
return;
|
||||
}
|
||||
maxdom = 0;
|
||||
for (i = 0; i < nnumapregions; i++)
|
||||
if (numa_pregions[i].mr_domain > maxdom)
|
||||
maxdom = numa_pregions[i].mr_domain;
|
||||
|
||||
mi = mem_info;
|
||||
for (i = 0; i < nnumapregions; i++, mi++) {
|
||||
mi->start = numa_pregions[i].mr_start;
|
||||
mi->end = numa_pregions[i].mr_start + numa_pregions[i].mr_size;
|
||||
mi->domain = numa_pregions[i].mr_domain;
|
||||
}
|
||||
offset = 0;
|
||||
vm_locality_table[offset] = 10;
|
||||
ndomain = maxdom + 1;
|
||||
if (ndomain > 1) {
|
||||
for (i = 0; i < ndomain; i++) {
|
||||
for (j = 0; j < ndomain; j++) {
|
||||
/*
|
||||
* Not sure what these values should actually be
|
||||
*/
|
||||
if (i == j)
|
||||
vm_locality_table[offset] = 10;
|
||||
else
|
||||
vm_locality_table[offset] = 21;
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
}
|
||||
vm_phys_register_domains(ndomain, mem_info, vm_locality_table);
|
||||
}
|
||||
|
||||
void
|
||||
mem_regions(struct mem_region **phys, int *physsz, struct mem_region **avail,
|
||||
int *availsz)
|
||||
@ -252,7 +306,7 @@ platform_smp_probe_threads(void)
|
||||
struct cpu_group *
|
||||
cpu_topo(void)
|
||||
{
|
||||
return (PLATFORM_SMP_TOPO(plat_obj));
|
||||
return (PLATFORM_SMP_TOPO(plat_obj));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -130,6 +130,22 @@ METHOD void mem_regions {
|
||||
int *_availsz;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Return the system's physical memory map.
|
||||
*
|
||||
* It shall provide the total RAM with the corresponding domains.
|
||||
*
|
||||
* @param _memp Array of physical memory chunks
|
||||
* @param _memsz Number of physical memory chunks
|
||||
*/
|
||||
|
||||
METHOD void numa_mem_regions {
|
||||
platform_t _plat;
|
||||
struct numa_mem_region *_memp;
|
||||
int *_memsz;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Return the maximum address accessible in real mode
|
||||
* (for use with hypervisors)
|
||||
|
Loading…
Reference in New Issue
Block a user