Replace a number of conflations of mp_ncpus and mp_maxid with either
mp_maxid or CPU_FOREACH() as appropriate. This fixes a number of places in the kernel that assumed CPU IDs are dense in [0, mp_ncpus) and would try, for example, to run tasks on CPUs that did not exist or to allocate too few buffers on systems with sparse CPU IDs in which there are holes in the range and mp_maxid > mp_ncpus. Such circumstances generally occur on systems with SMT, but on which SMT is disabled. This patch restores system operation at least on POWER8 systems configured in this way. There are a number of other places in the kernel with potential problems in these situations, but where sparse CPU IDs are not currently known to occur, mostly in the ARM machine-dependent code. These will be fixed in a follow-up commit after the stable/11 branch. PR: kern/210106 Reviewed by: jhb Approved by: re (glebius)
This commit is contained in:
parent
c5390e5ab4
commit
96c85efb4b
sys
amd64/include
cddl/compat/opensolaris/sys
dev/cpuctl
i386/include
kern
net
netinet
powerpc
vm
@ -51,7 +51,7 @@ counter_u64_fetch_inline(uint64_t *p)
|
||||
int i;
|
||||
|
||||
r = 0;
|
||||
for (i = 0; i < mp_ncpus; i++)
|
||||
CPU_FOREACH(i)
|
||||
r += counter_u64_read_one((uint64_t *)p, i);
|
||||
|
||||
return (r);
|
||||
|
@ -45,8 +45,8 @@
|
||||
#define CPU curcpu
|
||||
#define minclsyspri PRIBIO
|
||||
#define maxclsyspri PVM
|
||||
#define max_ncpus mp_ncpus
|
||||
#define boot_max_ncpus mp_ncpus
|
||||
#define max_ncpus (mp_maxid + 1)
|
||||
#define boot_max_ncpus (mp_maxid + 1)
|
||||
|
||||
#define TS_RUN 0
|
||||
|
||||
|
@ -120,7 +120,7 @@ static void
|
||||
set_cpu(int cpu, struct thread *td)
|
||||
{
|
||||
|
||||
KASSERT(cpu >= 0 && cpu < mp_ncpus && cpu_enabled(cpu),
|
||||
KASSERT(cpu >= 0 && cpu <= mp_maxid && cpu_enabled(cpu),
|
||||
("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
|
||||
thread_lock(td);
|
||||
sched_bind(td, cpu);
|
||||
@ -133,7 +133,7 @@ static void
|
||||
restore_cpu(int oldcpu, int is_bound, struct thread *td)
|
||||
{
|
||||
|
||||
KASSERT(oldcpu >= 0 && oldcpu < mp_ncpus && cpu_enabled(oldcpu),
|
||||
KASSERT(oldcpu >= 0 && oldcpu <= mp_maxid && cpu_enabled(oldcpu),
|
||||
("[cpuctl,%d]: bad cpu number %d", __LINE__, oldcpu));
|
||||
thread_lock(td);
|
||||
if (is_bound == 0)
|
||||
@ -150,7 +150,7 @@ cpuctl_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
|
||||
int ret;
|
||||
int cpu = dev2unit(dev);
|
||||
|
||||
if (cpu >= mp_ncpus || !cpu_enabled(cpu)) {
|
||||
if (cpu > mp_maxid || !cpu_enabled(cpu)) {
|
||||
DPRINTF("[cpuctl,%d]: bad cpu number %d\n", __LINE__, cpu);
|
||||
return (ENXIO);
|
||||
}
|
||||
@ -201,7 +201,7 @@ cpuctl_do_cpuid_count(int cpu, cpuctl_cpuid_count_args_t *data,
|
||||
int is_bound = 0;
|
||||
int oldcpu;
|
||||
|
||||
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
||||
KASSERT(cpu >= 0 && cpu <= mp_maxid,
|
||||
("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
|
||||
|
||||
/* Explicitly clear cpuid data to avoid returning stale info. */
|
||||
@ -245,7 +245,7 @@ cpuctl_do_msr(int cpu, cpuctl_msr_args_t *data, u_long cmd, struct thread *td)
|
||||
int oldcpu;
|
||||
int ret;
|
||||
|
||||
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
||||
KASSERT(cpu >= 0 && cpu <= mp_maxid,
|
||||
("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
|
||||
|
||||
/*
|
||||
@ -296,7 +296,7 @@ cpuctl_do_update(int cpu, cpuctl_update_args_t *data, struct thread *td)
|
||||
char vendor[13];
|
||||
int ret;
|
||||
|
||||
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
||||
KASSERT(cpu >= 0 && cpu <= mp_maxid,
|
||||
("[cpuctl,%d]: bad cpu number %d", __LINE__, cpu));
|
||||
DPRINTF("[cpuctl,%d]: XXX %d", __LINE__, cpu);
|
||||
|
||||
@ -512,7 +512,7 @@ cpuctl_open(struct cdev *dev, int flags, int fmt __unused, struct thread *td)
|
||||
int cpu;
|
||||
|
||||
cpu = dev2unit(dev);
|
||||
if (cpu >= mp_ncpus || !cpu_enabled(cpu)) {
|
||||
if (cpu > mp_maxid || !cpu_enabled(cpu)) {
|
||||
DPRINTF("[cpuctl,%d]: incorrect cpu number %d\n", __LINE__,
|
||||
cpu);
|
||||
return (ENXIO);
|
||||
@ -531,15 +531,15 @@ cpuctl_modevent(module_t mod __unused, int type, void *data __unused)
|
||||
case MOD_LOAD:
|
||||
if (bootverbose)
|
||||
printf("cpuctl: access to MSR registers/cpuid info.\n");
|
||||
cpuctl_devs = malloc(sizeof(*cpuctl_devs) * mp_ncpus, M_CPUCTL,
|
||||
cpuctl_devs = malloc(sizeof(*cpuctl_devs) * (mp_maxid + 1), M_CPUCTL,
|
||||
M_WAITOK | M_ZERO);
|
||||
for (cpu = 0; cpu < mp_ncpus; cpu++)
|
||||
CPU_FOREACH(cpu)
|
||||
if (cpu_enabled(cpu))
|
||||
cpuctl_devs[cpu] = make_dev(&cpuctl_cdevsw, cpu,
|
||||
UID_ROOT, GID_KMEM, 0640, "cpuctl%d", cpu);
|
||||
break;
|
||||
case MOD_UNLOAD:
|
||||
for (cpu = 0; cpu < mp_ncpus; cpu++) {
|
||||
CPU_FOREACH(cpu) {
|
||||
if (cpuctl_devs[cpu] != NULL)
|
||||
destroy_dev(cpuctl_devs[cpu]);
|
||||
}
|
||||
|
@ -98,13 +98,13 @@ counter_u64_fetch_inline(uint64_t *p)
|
||||
* critical section as well.
|
||||
*/
|
||||
critical_enter();
|
||||
for (i = 0; i < mp_ncpus; i++) {
|
||||
CPU_FOREACH(i) {
|
||||
res += *(uint64_t *)((char *)p +
|
||||
sizeof(struct pcpu) * i);
|
||||
}
|
||||
critical_exit();
|
||||
} else {
|
||||
for (i = 0; i < mp_ncpus; i++)
|
||||
CPU_FOREACH(i)
|
||||
res += counter_u64_read_one_8b((uint64_t *)((char *)p +
|
||||
sizeof(struct pcpu) * i));
|
||||
}
|
||||
@ -144,7 +144,7 @@ counter_u64_zero_inline(counter_u64_t c)
|
||||
|
||||
if ((cpu_feature & CPUID_CX8) == 0) {
|
||||
critical_enter();
|
||||
for (i = 0; i < mp_ncpus; i++)
|
||||
CPU_FOREACH(i)
|
||||
*(uint64_t *)((char *)c + sizeof(struct pcpu) * i) = 0;
|
||||
critical_exit();
|
||||
} else {
|
||||
|
@ -248,7 +248,7 @@ dpcpu_copy(void *s, int size)
|
||||
uintptr_t dpcpu;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mp_ncpus; ++i) {
|
||||
CPU_FOREACH(i) {
|
||||
dpcpu = dpcpu_off[i];
|
||||
if (dpcpu == 0)
|
||||
continue;
|
||||
@ -289,7 +289,7 @@ sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS)
|
||||
int i;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < mp_ncpus; ++i) {
|
||||
CPU_FOREACH(i) {
|
||||
dpcpu = dpcpu_off[i];
|
||||
if (dpcpu == 0)
|
||||
continue;
|
||||
@ -306,7 +306,7 @@ sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS)
|
||||
int i;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < mp_ncpus; ++i) {
|
||||
CPU_FOREACH(i) {
|
||||
dpcpu = dpcpu_off[i];
|
||||
if (dpcpu == 0)
|
||||
continue;
|
||||
@ -323,7 +323,7 @@ sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS)
|
||||
int i;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < mp_ncpus; ++i) {
|
||||
CPU_FOREACH(i) {
|
||||
dpcpu = dpcpu_off[i];
|
||||
if (dpcpu == 0)
|
||||
continue;
|
||||
|
@ -832,6 +832,7 @@ static void
|
||||
taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
|
||||
{
|
||||
struct taskqgroup_cpu *qcpu;
|
||||
int i, j;
|
||||
|
||||
qcpu = &qgroup->tqg_queue[idx];
|
||||
LIST_INIT(&qcpu->tgc_tasks);
|
||||
@ -839,7 +840,15 @@ taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
|
||||
taskqueue_thread_enqueue, &qcpu->tgc_taskq);
|
||||
taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
|
||||
"%s_%d", qgroup->tqg_name, idx);
|
||||
qcpu->tgc_cpu = idx * qgroup->tqg_stride;
|
||||
|
||||
for (i = CPU_FIRST(), j = 0; j < idx * qgroup->tqg_stride;
|
||||
j++, i = CPU_NEXT(i)) {
|
||||
/*
|
||||
* Wait: evaluate the idx * qgroup->tqg_stride'th CPU,
|
||||
* potentially wrapping the actual count
|
||||
*/
|
||||
}
|
||||
qcpu->tgc_cpu = i;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1017,13 +1026,14 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
|
||||
LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
|
||||
cpuset_t mask;
|
||||
struct grouptask *gtask;
|
||||
int i, old_cnt, qid;
|
||||
int i, k, old_cnt, qid, cpu;
|
||||
|
||||
mtx_assert(&qgroup->tqg_lock, MA_OWNED);
|
||||
|
||||
if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
|
||||
printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
|
||||
cnt, stride, mp_ncpus, smp_started);
|
||||
printf("taskqgroup_adjust failed cnt: %d stride: %d "
|
||||
"mp_ncpus: %d smp_started: %d\n", cnt, stride, mp_ncpus,
|
||||
smp_started);
|
||||
return (EINVAL);
|
||||
}
|
||||
if (qgroup->tqg_adjusting) {
|
||||
@ -1081,8 +1091,11 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
|
||||
/*
|
||||
* Set new CPU and IRQ affinity
|
||||
*/
|
||||
cpu = CPU_FIRST();
|
||||
for (i = 0; i < cnt; i++) {
|
||||
qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride;
|
||||
qgroup->tqg_queue[i].tgc_cpu = cpu;
|
||||
for (k = 0; k < qgroup->tqg_stride; k++)
|
||||
cpu = CPU_NEXT(cpu);
|
||||
CPU_ZERO(&mask);
|
||||
CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
|
||||
LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
|
||||
|
@ -746,7 +746,7 @@ flowtable_alloc(struct flowtable *ft)
|
||||
ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO);
|
||||
|
||||
ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK);
|
||||
for (int i = 0; i < mp_ncpus; i++) {
|
||||
CPU_FOREACH(i) {
|
||||
bitstr_t **b;
|
||||
|
||||
b = zpcpu_get_cpu(ft->ft_masks, i);
|
||||
|
@ -3848,7 +3848,7 @@ iflib_queues_alloc(if_ctx_t ctx)
|
||||
iflib_txq_t txq;
|
||||
iflib_rxq_t rxq;
|
||||
iflib_fl_t fl = NULL;
|
||||
int i, j, err, txconf, rxconf, fl_ifdi_offset;
|
||||
int i, j, cpu, err, txconf, rxconf, fl_ifdi_offset;
|
||||
iflib_dma_info_t ifdip;
|
||||
uint32_t *rxqsizes = sctx->isc_rxqsizes;
|
||||
uint32_t *txqsizes = sctx->isc_txqsizes;
|
||||
@ -3897,7 +3897,7 @@ iflib_queues_alloc(if_ctx_t ctx)
|
||||
/*
|
||||
* XXX handle allocation failure
|
||||
*/
|
||||
for (txconf = i = 0; i < ntxqsets; i++, txconf++, txq++) {
|
||||
for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
|
||||
/* Set up some basics */
|
||||
|
||||
if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) {
|
||||
@ -3917,8 +3917,8 @@ iflib_queues_alloc(if_ctx_t ctx)
|
||||
txq->ift_ctx = ctx;
|
||||
txq->ift_id = i;
|
||||
/* XXX fix this */
|
||||
txq->ift_timer.c_cpu = i % mp_ncpus;
|
||||
txq->ift_db_check.c_cpu = i % mp_ncpus;
|
||||
txq->ift_timer.c_cpu = cpu;
|
||||
txq->ift_db_check.c_cpu = cpu;
|
||||
txq->ift_nbr = nbuf_rings;
|
||||
|
||||
if (iflib_txsd_alloc(txq)) {
|
||||
|
@ -275,10 +275,12 @@ ip_fillid(struct ip *ip)
|
||||
static void
|
||||
ipid_sysinit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
mtx_init(&V_ip_id_mtx, "ip_id_mtx", NULL, MTX_DEF);
|
||||
V_ip_id = counter_u64_alloc(M_WAITOK);
|
||||
for (int i = 0; i < mp_ncpus; i++)
|
||||
|
||||
CPU_FOREACH(i)
|
||||
arc4rand(zpcpu_get_cpu(V_ip_id, i), sizeof(uint64_t), 0);
|
||||
}
|
||||
VNET_SYSINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysinit, NULL);
|
||||
|
@ -54,7 +54,7 @@ counter_u64_fetch_inline(uint64_t *p)
|
||||
int i;
|
||||
|
||||
r = 0;
|
||||
for (i = 0; i < mp_ncpus; i++)
|
||||
CPU_FOREACH(i)
|
||||
r += counter_u64_read_one((uint64_t *)p, i);
|
||||
|
||||
return (r);
|
||||
|
@ -113,20 +113,16 @@ cpu_mp_setmaxid(void)
|
||||
int error;
|
||||
|
||||
mp_ncpus = 0;
|
||||
mp_maxid = 0;
|
||||
error = platform_smp_first_cpu(&cpuref);
|
||||
while (!error) {
|
||||
mp_ncpus++;
|
||||
mp_maxid = max(cpuref.cr_cpuid, mp_maxid);
|
||||
error = platform_smp_next_cpu(&cpuref);
|
||||
}
|
||||
/* Sanity. */
|
||||
if (mp_ncpus == 0)
|
||||
mp_ncpus = 1;
|
||||
|
||||
/*
|
||||
* Set the largest cpuid we're going to use. This is necessary
|
||||
* for VM initialization.
|
||||
*/
|
||||
mp_maxid = min(mp_ncpus, MAXCPU) - 1;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -276,7 +276,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
|
||||
* mini-dumps.
|
||||
*/
|
||||
#define UMA_ZONE_PCPU 0x8000 /*
|
||||
* Allocates mp_ncpus slabs sized to
|
||||
* Allocates mp_maxid + 1 slabs sized to
|
||||
* sizeof(struct pcpu).
|
||||
*/
|
||||
|
||||
|
@ -1227,7 +1227,7 @@ keg_small_init(uma_keg_t keg)
|
||||
u_int shsize;
|
||||
|
||||
if (keg->uk_flags & UMA_ZONE_PCPU) {
|
||||
u_int ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
|
||||
u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
|
||||
|
||||
keg->uk_slabsize = sizeof(struct pcpu);
|
||||
keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
|
||||
@ -3265,9 +3265,10 @@ uma_large_free(uma_slab_t slab)
|
||||
static void
|
||||
uma_zero_item(void *item, uma_zone_t zone)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (zone->uz_flags & UMA_ZONE_PCPU) {
|
||||
for (int i = 0; i < mp_ncpus; i++)
|
||||
CPU_FOREACH(i)
|
||||
bzero(zpcpu_get_cpu(item, i), zone->uz_size);
|
||||
} else
|
||||
bzero(item, zone->uz_size);
|
||||
|
Loading…
x
Reference in New Issue
Block a user