powernv: Include NUMA locality information in the CPU topology
ULE uses this topology to try and preserve locality when migrating threads between CPUs and when performing work stealing. Ensure that on NUMA systems it will at least take the NUMA topology into account. Reviewed by: bdragon, jhibbits (previous version) Tested by: bdragon MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D28580
This commit is contained in:
parent
b8ae450f05
commit
cb5f8694a5
@ -480,22 +480,72 @@ powernv_smp_probe_threads(platform_t plat)
|
||||
mp_ncores = mp_ncpus / nthreads;
|
||||
}
|
||||
|
||||
static struct cpu_group *
|
||||
cpu_group_init(struct cpu_group *group, struct cpu_group *parent,
|
||||
const cpuset_t *cpus, int children, int level, int flags)
|
||||
{
|
||||
struct cpu_group *child;
|
||||
|
||||
child = children != 0 ? smp_topo_alloc(children) : NULL;
|
||||
|
||||
group->cg_parent = parent;
|
||||
group->cg_child = child;
|
||||
CPU_COPY(cpus, &group->cg_mask);
|
||||
group->cg_count = CPU_COUNT(cpus);
|
||||
group->cg_children = children;
|
||||
group->cg_level = level;
|
||||
group->cg_flags = flags;
|
||||
|
||||
return (child);
|
||||
}
|
||||
|
||||
static struct cpu_group *
|
||||
powernv_smp_topo(platform_t plat)
|
||||
{
|
||||
struct cpu_group *core, *dom, *root;
|
||||
cpuset_t corecpus, domcpus;
|
||||
int cpuid, i, j, k, ncores;
|
||||
|
||||
if (mp_ncpus % smp_threads_per_core != 0) {
|
||||
printf("WARNING: Irregular SMP topology. Performance may be "
|
||||
"suboptimal (%d threads, %d on first core)\n",
|
||||
mp_ncpus, smp_threads_per_core);
|
||||
printf("%s: irregular SMP topology (%d threads, %d per core)\n",
|
||||
__func__, mp_ncpus, smp_threads_per_core);
|
||||
return (smp_topo_none());
|
||||
}
|
||||
|
||||
/* Don't do anything fancier for non-threaded SMP */
|
||||
if (smp_threads_per_core == 1)
|
||||
return (smp_topo_none());
|
||||
root = smp_topo_alloc(1);
|
||||
dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE,
|
||||
0);
|
||||
|
||||
return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core,
|
||||
CG_FLAG_SMT));
|
||||
/*
|
||||
* Redundant layers will be collapsed by the caller so we don't need a
|
||||
* special case for a single domain.
|
||||
*/
|
||||
for (i = 0; i < vm_ndomains; i++, dom++) {
|
||||
CPU_COPY(&cpuset_domain[i], &domcpus);
|
||||
ncores = CPU_COUNT(&domcpus) / smp_threads_per_core;
|
||||
KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0,
|
||||
("%s: domain %d core count not divisible by thread count",
|
||||
__func__, i));
|
||||
|
||||
core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3,
|
||||
0);
|
||||
for (j = 0; j < ncores; j++, core++) {
|
||||
/*
|
||||
* Assume that consecutive CPU IDs correspond to sibling
|
||||
* threads.
|
||||
*/
|
||||
CPU_ZERO(&corecpus);
|
||||
for (k = 0; k < smp_threads_per_core; k++) {
|
||||
cpuid = CPU_FFS(&domcpus) - 1;
|
||||
CPU_CLR(cpuid, &domcpus);
|
||||
CPU_SET(cpuid, &corecpus);
|
||||
}
|
||||
(void)cpu_group_init(core, dom, &corecpus, 0,
|
||||
CG_SHARE_L1, CG_FLAG_SMT);
|
||||
}
|
||||
}
|
||||
|
||||
return (root);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user