Merge from projects/counters: UMA_ZONE_PCPU zones.

These zones have slab size == sizeof(struct pcpu), but request from VM
enough pages to fit (uk_slabsize * mp_ncpus). An item allocated from such
zone would have a separate twin for each CPU in the system, and these twins
are at a distance of sizeof(struct pcpu) from each other. This magic value
of distance would allow us to make some optimizations later.

  To address private item from a CPU simple arithmetics should be used:

  item = (type *)((char *)base + sizeof(struct pcpu) * curcpu)

  These arithmetics are available as zpcpu_get() macro in pcpu.h.

  To introduce non-page size slabs a new field had been added to uma_keg
uk_slabsize. This shifted some frequently used fields of uma_keg to the
fourth cache line on amd64. To mitigate this pessimization, uma_keg fields
were a bit rearranged and least frequently used uk_name and uk_link moved
down to the fourth cache line. All other fields, that are dereferenced
frequently fit into first three cache lines.

Sponsored by:	Nginx, Inc.
This commit is contained in:
Gleb Smirnoff 2013-04-08 19:10:45 +00:00
parent dad1421650
commit ad97af7ebd
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=249264
4 changed files with 85 additions and 38 deletions

View File

@ -153,6 +153,23 @@ See
.Fn uma_find_refcnt .
.It Dv UMA_ZONE_NODUMP
Pages belonging to the zone will not be included into mini-dumps.
.It Dv UMA_ZONE_PCPU
An allocation from zone would have
.Va mp_ncpu
shadow copies, that are privately assigned to CPUs.
A CPU can address its private copy using base allocation address plus
multiple of current CPU id and
.Fn sizeof "struct pcpu" :
.Bd -literal -offset indent
foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
...
foo_base = uma_zalloc(foo_zone, ...);
...
critical_enter();
foo_pcpu = (foo_t *)zpcpu_get(foo_base);
/* do something with foo_pcpu */
critical_exit();
.Ed
.It Dv UMA_ZONE_OFFPAGE
By default book-keeping of items within a slab is done in the slab page itself.
This flag explicitly tells subsystem that book-keeping structure should be

View File

@ -252,6 +252,10 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
* Zone's pages will not be included in
* mini-dumps.
*/
#define UMA_ZONE_PCPU 0x8000 /*
* Allocates mp_ncpus slabs sized to
* sizeof(struct pcpu).
*/
/*
* These flags are shared between the keg and zone. In zones wishing to add
@ -260,7 +264,7 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
*/
#define UMA_ZONE_INHERIT \
(UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \
UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB)
UMA_ZONE_HASH | UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU)
/* Definitions for align */
#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */

View File

@ -765,9 +765,9 @@ keg_drain(uma_keg_t keg)
SKIP_NONE, ZFREE_STATFREE);
#ifdef UMA_DEBUG
printf("%s: Returning %d bytes.\n",
keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
keg->uk_name, PAGE_SIZE * keg->uk_ppera);
#endif
keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
}
}
@ -865,7 +865,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
wait |= M_NODUMP;
/* zone is passed for legacy reasons. */
mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab, NULL,
@ -927,7 +927,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
zone_free_item(keg->uk_slabzone, slab,
NULL, SKIP_NONE, ZFREE_STATFREE);
keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera,
flags);
KEG_LOCK(keg);
return (NULL);
@ -1138,16 +1138,27 @@ keg_small_init(uma_keg_t keg)
u_int wastedspace;
u_int shsize;
KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
if (keg->uk_flags & UMA_ZONE_PCPU) {
keg->uk_slabsize = sizeof(struct pcpu);
keg->uk_ppera = howmany(mp_ncpus * sizeof(struct pcpu),
PAGE_SIZE);
} else {
keg->uk_slabsize = UMA_SLAB_SIZE;
keg->uk_ppera = 1;
}
rsize = keg->uk_size;
if (rsize < UMA_SMALLEST_UNIT)
rsize = UMA_SMALLEST_UNIT;
if (rsize & keg->uk_align)
rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
if (rsize < keg->uk_slabsize / 256)
rsize = keg->uk_slabsize / 256;
keg->uk_rsize = rsize;
keg->uk_ppera = 1;
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
keg->uk_rsize < sizeof(struct pcpu),
("%s: size %u too large", __func__, keg->uk_rsize));
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
shsize = 0;
@ -1159,10 +1170,12 @@ keg_small_init(uma_keg_t keg)
shsize = sizeof(struct uma_slab);
}
keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
memused = keg->uk_ipers * rsize + shsize;
wastedspace = UMA_SLAB_SIZE - memused;
wastedspace = keg->uk_slabsize - memused;
/*
* We can't do OFFPAGE if we're internal or if we've been
@ -1175,24 +1188,26 @@ keg_small_init(uma_keg_t keg)
(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
return;
if ((wastedspace >= UMA_MAX_WASTE) &&
(keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
KASSERT(keg->uk_ipers <= 255,
("keg_small_init: keg->uk_ipers too high!"));
if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) &&
(keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) {
keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize;
KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 255,
("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
#ifdef UMA_DEBUG
printf("UMA decided we need offpage slab headers for "
"keg: %s, calculated wastedspace = %d, "
"maximum wasted space allowed = %d, "
"calculated ipers = %d, "
"new wasted space = %d\n", keg->uk_name, wastedspace,
UMA_MAX_WASTE, keg->uk_ipers,
UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers,
keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize);
#endif
keg->uk_flags |= UMA_ZONE_OFFPAGE;
if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
keg->uk_flags |= UMA_ZONE_HASH;
}
if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
(keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
keg->uk_flags |= UMA_ZONE_HASH;
}
/*
@ -1209,19 +1224,15 @@ keg_small_init(uma_keg_t keg)
static void
keg_large_init(uma_keg_t keg)
{
int pages;
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
pages = keg->uk_size / UMA_SLAB_SIZE;
/* Account for remainder */
if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
pages++;
keg->uk_ppera = pages;
keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE;
keg->uk_ipers = 1;
keg->uk_rsize = keg->uk_size;
@ -1242,6 +1253,9 @@ keg_cachespread_init(uma_keg_t keg)
int pages;
int rsize;
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
alignsize = keg->uk_align + 1;
rsize = keg->uk_size;
/*
@ -1259,6 +1273,7 @@ keg_cachespread_init(uma_keg_t keg)
pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
keg->uk_rsize = rsize;
keg->uk_ppera = pages;
keg->uk_slabsize = UMA_SLAB_SIZE;
keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
KASSERT(keg->uk_ipers <= uma_max_ipers,
@ -1308,6 +1323,13 @@ keg_ctor(void *mem, int size, void *udata, int flags)
if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
keg->uk_flags |= UMA_ZONE_VTOSLAB;
if (arg->flags & UMA_ZONE_PCPU)
#ifdef SMP
keg->uk_flags |= UMA_ZONE_OFFPAGE;
#else
keg->uk_flags &= ~UMA_ZONE_PCPU;
#endif
/*
* The +UMA_FRITM_SZ added to uk_size is to account for the
* linkage that is added to the size in keg_small_init(). If
@ -1385,7 +1407,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
if (totsize & UMA_ALIGN_PTR)
totsize = (totsize & ~UMA_ALIGN_PTR) +
(UMA_ALIGN_PTR + 1);
keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
if (keg->uk_flags & UMA_ZONE_REFCNT)
totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
@ -1401,7 +1423,7 @@ keg_ctor(void *mem, int size, void *udata, int flags)
* mathematically possible for all cases, so we make
* sure here anyway.
*/
if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
if (totsize > PAGE_SIZE * keg->uk_ppera) {
printf("zone %s ipers %d rsize %d size %d\n",
zone->uz_name, keg->uk_ipers, keg->uk_rsize,
keg->uk_size);
@ -1676,7 +1698,8 @@ uma_startup(void *bootmem, int boot_pages)
* that we need to go to offpage slab headers. Or, if we do,
* then we trap that condition below and panic in the INVARIANTS case.
*/
wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) -
(UMA_SLAB_SIZE / UMA_MAX_WASTE);
totsize = wsize;
objsize = UMA_SMALLEST_UNIT;
while (totsize >= wsize) {
@ -1689,7 +1712,8 @@ uma_startup(void *bootmem, int boot_pages)
objsize--;
uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) -
(UMA_SLAB_SIZE / UMA_MAX_WASTE);
totsize = wsize;
objsize = UMA_SMALLEST_UNIT;
while (totsize >= wsize) {

View File

@ -120,8 +120,8 @@
#define UMA_BOOT_PAGES 64 /* Pages allocated for startup */
/* Max waste before going to off page slab management */
#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10)
/* Max waste percentage before going to off page slab management */
#define UMA_MAX_WASTE 10
/*
* I doubt there will be many cases where this is exceeded. This is the initial
@ -197,12 +197,9 @@ typedef struct uma_cache * uma_cache_t;
*
*/
struct uma_keg {
LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
struct mtx uk_lock; /* Lock for the keg */
struct uma_hash uk_hash;
const char *uk_name; /* Name of creating zone. */
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */
LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */
@ -225,10 +222,15 @@ struct uma_keg {
vm_offset_t uk_kva; /* Zone base KVA */
uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */
u_int16_t uk_slabsize; /* Slab size for this keg */
u_int16_t uk_pgoff; /* Offset to uma_slab struct */
u_int16_t uk_ppera; /* pages per allocation from backend */
u_int16_t uk_ipers; /* Items per slab */
u_int32_t uk_flags; /* Internal flags */
/* Least used fields go to the last cache line. */
const char *uk_name; /* Name of creating zone. */
LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */
};
typedef struct uma_keg * uma_keg_t;