Back pcpu zone with domain correct pages

- Change pcpu zone consumers to use a stride size of PAGE_SIZE.
  (defined as UMA_PCPU_ALLOC_SIZE to make future identification easier)

- Allocate page from the correct domain for a given cpu.

- Don't initialize pc_domain to non-zero value if NUMA is not defined
  There are some misconceptions surrounding this field. It is the
  _VM_ NUMA domain and should only ever correspond to valid domain
  values as understood by the VM.

The former slab size of sizeof(struct pcpu) was somewhat arbitrary.
The new value is PAGE_SIZE because that's the smallest granularity
which the VM can allocate a slab for a given domain. If you have
fewer than PAGE_SIZE/8 counters on your system there will be some
memory wasted, but this is obviously something where you want the
cache line to be coming from the correct domain.

Reviewed by: jeff
Sponsored by: Limelight Networks
Differential Revision:  https://reviews.freebsd.org/D15933
This commit is contained in:
Matt Macy 2018-07-06 02:06:03 +00:00
parent 88171893e2
commit ab3059a8e7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=336020
13 changed files with 135 additions and 32 deletions

View File

@ -45,7 +45,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -65,7 +65,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}

View File

@ -47,7 +47,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (atomic_load_64((uint64_t *)((char *)p + sizeof(struct pcpu) *
return (atomic_load_64((uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE *
cpu)));
}
@ -68,7 +68,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
atomic_store_64((uint64_t *)((char *)arg + sizeof(struct pcpu) *
atomic_store_64((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid)), 0);
}

View File

@ -44,7 +44,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -64,7 +64,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}

View File

@ -104,13 +104,13 @@ counter_u64_fetch_inline(uint64_t *p)
critical_enter();
CPU_FOREACH(i) {
res += *(uint64_t *)((char *)p +
sizeof(struct pcpu) * i);
UMA_PCPU_ALLOC_SIZE * i);
}
critical_exit();
} else {
CPU_FOREACH(i)
res += counter_u64_read_one_8b((uint64_t *)((char *)p +
sizeof(struct pcpu) * i));
UMA_PCPU_ALLOC_SIZE * i));
}
return (res);
}
@ -137,7 +137,7 @@ counter_u64_zero_one_cpu(void *arg)
{
uint64_t *p;
p = (uint64_t *)((char *)arg + sizeof(struct pcpu) * PCPU_GET(cpuid));
p = (uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE * PCPU_GET(cpuid));
counter_u64_zero_one_8b(p);
}
@ -149,7 +149,7 @@ counter_u64_zero_inline(counter_u64_t c)
if ((cpu_feature & CPUID_CX8) == 0) {
critical_enter();
CPU_FOREACH(i)
*(uint64_t *)((char *)c + sizeof(struct pcpu) * i) = 0;
*(uint64_t *)((char *)c + UMA_PCPU_ALLOC_SIZE * i) = 0;
critical_exit();
} else {
smp_rendezvous(smp_no_rendezvous_barrier,

View File

@ -42,7 +42,16 @@ __FBSDID("$FreeBSD$");
#define IN_SUBR_COUNTER_C
#include <sys/counter.h>
static void
counter_u64_zero_sync(counter_u64_t c)
{
int cpu;
CPU_FOREACH(cpu)
*(uint64_t*)zpcpu_get_cpu(c, cpu) = 0;
}
void
counter_u64_zero(counter_u64_t c)
{
@ -64,7 +73,7 @@ counter_u64_alloc(int flags)
r = uma_zalloc_pcpu(pcpu_zone_64, flags);
if (r != NULL)
counter_u64_zero(r);
counter_u64_zero_sync(r);
return (r);
}

View File

@ -47,7 +47,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -68,7 +68,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}

View File

@ -50,7 +50,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -70,7 +70,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}
@ -113,7 +113,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -134,7 +134,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}

View File

@ -46,7 +46,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -67,7 +67,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}

View File

@ -47,7 +47,7 @@ static inline uint64_t
counter_u64_read_one(uint64_t *p, int cpu)
{
return (*(uint64_t *)((char *)p + sizeof(struct pcpu) * cpu));
return (*(uint64_t *)((char *)p + UMA_PCPU_ALLOC_SIZE * cpu));
}
static inline uint64_t
@ -68,7 +68,7 @@ static void
counter_u64_zero_one_cpu(void *arg)
{
*((uint64_t *)((char *)arg + sizeof(struct pcpu) *
*((uint64_t *)((char *)arg + UMA_PCPU_ALLOC_SIZE *
PCPU_GET(cpuid))) = 0;
}

View File

@ -207,19 +207,21 @@ extern struct pcpu *cpuid_to_pcpu[];
#endif
#define curvidata PCPU_GET(vidata)
#define UMA_PCPU_ALLOC_SIZE PAGE_SIZE
/* Accessor to elements allocated via UMA_ZONE_PCPU zone. */
static inline void *
zpcpu_get(void *base)
{
return ((char *)(base) + sizeof(struct pcpu) * curcpu);
return ((char *)(base) + UMA_PCPU_ALLOC_SIZE * curcpu);
}
static inline void *
zpcpu_get_cpu(void *base, int cpu)
{
return ((char *)(base) + sizeof(struct pcpu) * cpu);
return ((char *)(base) + UMA_PCPU_ALLOC_SIZE * cpu);
}
/*

View File

@ -279,8 +279,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
* mini-dumps.
*/
#define UMA_ZONE_PCPU 0x8000 /*
* Allocates mp_maxid + 1 slabs sized to
* sizeof(struct pcpu).
* Allocates mp_maxid + 1 slabs of PAGE_SIZE
*/
#define UMA_ZONE_NUMA 0x10000 /*
* NUMA aware Zone. Implements a best

View File

@ -229,8 +229,10 @@ void uma_startup2(void);
static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
static void *pcpu_page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int);
static void page_free(void *, vm_size_t, uint8_t);
static void pcpu_page_free(void *, vm_size_t, uint8_t);
static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int);
static void cache_drain(uma_zone_t);
static void bucket_drain(uma_zone_t, uma_bucket_t);
@ -1172,6 +1174,58 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
return (p);
}
static void *
pcpu_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
int wait)
{
struct pglist alloctail;
vm_offset_t addr, zkva;
int cpu, flags;
vm_page_t p, p_next;
#ifdef NUMA
struct pcpu *pc;
#endif
TAILQ_INIT(&alloctail);
MPASS(bytes == (mp_maxid+1)*PAGE_SIZE);
*pflag = UMA_SLAB_KERNEL;
flags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ |
((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK :
VM_ALLOC_NOWAIT);
for (cpu = 0; cpu <= mp_maxid; cpu++) {
if (CPU_ABSENT(cpu)) {
p = vm_page_alloc(NULL, 0, flags);
} else {
#ifndef NUMA
p = vm_page_alloc(NULL, 0, flags);
#else
pc = pcpu_find(cpu);
p = vm_page_alloc_domain(NULL, 0, pc->pc_domain, flags);
if (__predict_false(p == NULL))
p = vm_page_alloc(NULL, 0, flags);
#endif
}
if (__predict_false(p == NULL))
goto fail;
TAILQ_INSERT_TAIL(&alloctail, p, listq);
}
if ((addr = kva_alloc(bytes)) == 0)
goto fail;
zkva = addr;
TAILQ_FOREACH(p, &alloctail, listq) {
pmap_qenter(zkva, &p, 1);
zkva += PAGE_SIZE;
}
return ((void*)addr);
fail:
TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
vm_page_unwire(p, PQ_NONE);
vm_page_free(p);
}
return (NULL);
}
/*
* Allocates a number of pages from within an object
*
@ -1257,6 +1311,37 @@ page_free(void *mem, vm_size_t size, uint8_t flags)
kmem_free(vmem, (vm_offset_t)mem, size);
}
/*
* Frees pcpu zone allocations
*
* Arguments:
* mem A pointer to the memory to be freed
* size The size of the memory being freed
* flags The original p->us_flags field
*
* Returns:
* Nothing
*/
static void
pcpu_page_free(void *mem, vm_size_t size, uint8_t flags)
{
vm_offset_t sva, curva;
vm_paddr_t paddr;
vm_page_t m;
MPASS(size == (mp_maxid+1)*PAGE_SIZE);
sva = (vm_offset_t)mem;
for (curva = sva; curva < sva + size; curva += PAGE_SIZE) {
paddr = pmap_kextract(curva);
m = PHYS_TO_VM_PAGE(paddr);
vm_page_unwire(m, PQ_NONE);
vm_page_free(m);
}
pmap_qremove(sva, size >> PAGE_SHIFT);
kva_free(sva, size);
}
/*
* Zero fill initializer
*
@ -1290,9 +1375,8 @@ keg_small_init(uma_keg_t keg)
if (keg->uk_flags & UMA_ZONE_PCPU) {
u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
slabsize = sizeof(struct pcpu);
keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
PAGE_SIZE);
slabsize = UMA_PCPU_ALLOC_SIZE;
keg->uk_ppera = ncpus;
} else {
slabsize = UMA_SLAB_SIZE;
keg->uk_ppera = 1;
@ -1311,7 +1395,7 @@ keg_small_init(uma_keg_t keg)
keg->uk_rsize = rsize;
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
keg->uk_rsize < sizeof(struct pcpu),
keg->uk_rsize < UMA_PCPU_ALLOC_SIZE,
("%s: size %u too large", __func__, keg->uk_rsize));
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
@ -1529,6 +1613,8 @@ keg_ctor(void *mem, int size, void *udata, int flags)
else if (keg->uk_ppera == 1)
keg->uk_allocf = uma_small_alloc;
#endif
else if (keg->uk_flags & UMA_ZONE_PCPU)
keg->uk_allocf = pcpu_page_alloc;
else
keg->uk_allocf = page_alloc;
#ifdef UMA_MD_SMALL_ALLOC
@ -1536,6 +1622,9 @@ keg_ctor(void *mem, int size, void *udata, int flags)
keg->uk_freef = uma_small_free;
else
#endif
if (keg->uk_flags & UMA_ZONE_PCPU)
keg->uk_freef = pcpu_page_free;
else
keg->uk_freef = page_free;
/*

View File

@ -532,11 +532,15 @@ srat_set_cpus(void *dummy)
if (!cpu->enabled)
panic("SRAT: CPU with APIC ID %u is not known",
pc->pc_apic_id);
#ifdef NUMA
pc->pc_domain = cpu->domain;
CPU_SET(i, &cpuset_domain[cpu->domain]);
#else
pc->pc_domain = 0;
#endif
CPU_SET(i, &cpuset_domain[pc->pc_domain]);
if (bootverbose)
printf("SRAT: CPU %u has memory domain %d\n", i,
cpu->domain);
pc->pc_domain);
}
/* Last usage of the cpus array, unmap it. */