i386: Do not ever store to other-CPU counter64 slot.

On CPUs supporting cmpxchg8b, fetch is performed by cmpxchg8b on
corresponding CPU slot, which unconditionally write to the slot.  If
for that slot, the owner CPU increments it, then both CPUs might run
the cmpxchg8b instruction concurrently and this might race and
override the incremental write.  So the counter update would be lost.

Fix it by implementing fetch as IPI and accumulation of result.  It is
acceptable for rare counter64 fetch operation to be more expensive.

Diagnosed and tested by:	Andreas Longwitz <longwitz@incore.de>
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
This commit is contained in:
kib 2019-02-03 21:28:58 +00:00
parent 93cad7dba5
commit 38b15fd590

View File

@ -72,7 +72,12 @@ counter_64_inc_8b(uint64_t *p, int64_t inc)
}
#ifdef IN_SUBR_COUNTER_C
static inline uint64_t
struct counter_u64_fetch_cx8_arg {
uint64_t res;
uint64_t *p;
};
static uint64_t
counter_u64_read_one_8b(uint64_t *p)
{
uint32_t res_lo, res_high;
@ -87,9 +92,22 @@ counter_u64_read_one_8b(uint64_t *p)
return (res_lo + ((uint64_t)res_high << 32));
}
static void
counter_u64_fetch_cx8_one(void *arg1)
{
struct counter_u64_fetch_cx8_arg *arg;
uint64_t val;
arg = arg1;
val = counter_u64_read_one_8b((uint64_t *)((char *)arg->p +
UMA_PCPU_ALLOC_SIZE * PCPU_GET(cpuid)));
atomic_add_64(&arg->res, val);
}
static inline uint64_t
counter_u64_fetch_inline(uint64_t *p)
{
struct counter_u64_fetch_cx8_arg arg;
uint64_t res;
int i;
@ -108,9 +126,10 @@ counter_u64_fetch_inline(uint64_t *p)
}
critical_exit();
} else {
CPU_FOREACH(i)
res += counter_u64_read_one_8b((uint64_t *)((char *)p +
UMA_PCPU_ALLOC_SIZE * i));
arg.p = p;
arg.res = 0;
smp_rendezvous(NULL, counter_u64_fetch_cx8_one, NULL, &arg);
res = arg.res;
}
return (res);
}