Handle large mallocs by going directly to kmem. Taking a detour through

UMA does not provide any additional value.

Reviewed by:	markj
Differential Revision:	https://reviews.freebsd.org/D22563
This commit is contained in:
Jeff Roberson 2019-11-29 03:14:10 +00:00
parent 85b7bedb15
commit 6d6a03d7a8
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=355203
5 changed files with 124 additions and 131 deletions

View File

@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/vmmeter.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/sbuf.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@ -78,6 +79,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/uma.h>
#include <vm/uma_int.h>
#include <vm/uma_dbg.h>
@ -551,6 +554,52 @@ malloc_dbg(caddr_t *vap, size_t *sizep, struct malloc_type *mtp,
}
#endif
/*
* Handle large allocations and frees by using kmem_malloc directly.
*/
static inline bool
malloc_large_slab(uma_slab_t slab)
{
uintptr_t va;
va = (uintptr_t)slab;
return ((va & 1) != 0);
}
static inline size_t
malloc_large_size(uma_slab_t slab)
{
uintptr_t va;
va = (uintptr_t)slab;
return (va >> 1);
}
static caddr_t
malloc_large(size_t *size, struct domainset *policy, int flags)
{
vm_offset_t va;
size_t sz;
sz = roundup(*size, PAGE_SIZE);
va = kmem_malloc_domainset(policy, sz, flags);
if (va != 0) {
/* The low bit is unused for slab pointers. */
vsetzoneslab(va, NULL, (void *)((sz << 1) | 1));
uma_total_inc(sz);
*size = sz;
}
return ((caddr_t)va);
}
static void
free_large(void *addr, size_t size)
{
kmem_free((vm_offset_t)addr, size);
uma_total_dec(size);
}
/*
* malloc:
*
@ -588,9 +637,7 @@ void *
size = zone->uz_size;
malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
} else {
size = roundup(size, PAGE_SIZE);
zone = NULL;
va = uma_large_malloc(size, flags);
va = malloc_large(&size, DOMAINSET_RR(), flags);
malloc_type_allocated(mtp, va == NULL ? 0 : size);
}
if (flags & M_WAITOK)
@ -605,46 +652,27 @@ void *
}
static void *
malloc_domain(size_t size, struct malloc_type *mtp, int domain, int flags)
malloc_domain(size_t size, int *indxp, struct malloc_type *mtp, int domain,
int flags)
{
int indx;
caddr_t va;
uma_zone_t zone;
#if defined(DEBUG_REDZONE)
unsigned long osize = size;
#endif
#ifdef MALLOC_DEBUG
va = NULL;
if (malloc_dbg(&va, &size, mtp, flags) != 0)
return (va);
#endif
if (size <= kmem_zmax && (flags & M_EXEC) == 0) {
if (size & KMEM_ZMASK)
size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
indx = kmemsize[size >> KMEM_ZSHIFT];
zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)];
KASSERT(size <= kmem_zmax && (flags & M_EXEC) == 0,
("malloc_domain: Called with bad flag / size combination."));
if (size & KMEM_ZMASK)
size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
indx = kmemsize[size >> KMEM_ZSHIFT];
zone = kmemzones[indx].kz_zone[mtp_get_subzone(mtp)];
#ifdef MALLOC_PROFILE
krequests[size >> KMEM_ZSHIFT]++;
krequests[size >> KMEM_ZSHIFT]++;
#endif
va = uma_zalloc_domain(zone, NULL, domain, flags);
if (va != NULL)
size = zone->uz_size;
malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx);
} else {
size = roundup(size, PAGE_SIZE);
zone = NULL;
va = uma_large_malloc_domain(size, domain, flags);
malloc_type_allocated(mtp, va == NULL ? 0 : size);
}
if (flags & M_WAITOK)
KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
else if (va == NULL)
t_malloc_fail = time_uptime;
#ifdef DEBUG_REDZONE
va = uma_zalloc_domain(zone, NULL, domain, flags);
if (va != NULL)
va = redzone_setup(va, osize);
#endif
size = zone->uz_size;
*indxp = indx;
return ((void *) va);
}
@ -653,16 +681,39 @@ malloc_domainset(size_t size, struct malloc_type *mtp, struct domainset *ds,
int flags)
{
struct vm_domainset_iter di;
void *ret;
caddr_t ret;
int domain;
int indx;
vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
do {
ret = malloc_domain(size, mtp, domain, flags);
if (ret != NULL)
break;
} while (vm_domainset_iter_policy(&di, &domain) == 0);
#if defined(DEBUG_REDZONE)
unsigned long osize = size;
#endif
#ifdef MALLOC_DEBUG
ret= NULL;
if (malloc_dbg(&ret, &size, mtp, flags) != 0)
return (ret);
#endif
if (size <= kmem_zmax && (flags & M_EXEC) == 0) {
vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
do {
ret = malloc_domain(size, &indx, mtp, domain, flags);
} while (ret == NULL &&
vm_domainset_iter_policy(&di, &domain) == 0);
malloc_type_zone_allocated(mtp, ret == NULL ? 0 : size, indx);
} else {
/* Policy is handled by kmem. */
ret = malloc_large(&size, ds, flags);
malloc_type_allocated(mtp, ret == NULL ? 0 : size);
}
if (flags & M_WAITOK)
KASSERT(ret != NULL, ("malloc(M_WAITOK) returned NULL"));
else if (ret == NULL)
t_malloc_fail = time_uptime;
#ifdef DEBUG_REDZONE
if (ret != NULL)
ret = redzone_setup(ret, osize);
#endif
return (ret);
}
@ -755,15 +806,15 @@ free(void *addr, struct malloc_type *mtp)
panic("free: address %p(%p) has not been allocated.\n",
addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
if (__predict_true(!malloc_large_slab(slab))) {
size = zone->uz_size;
#ifdef INVARIANTS
free_save_type(addr, mtp, size);
#endif
uma_zfree_arg(zone, addr, slab);
} else {
size = slab->us_size;
uma_large_free(slab);
size = malloc_large_size(slab);
free_large(addr, size);
}
malloc_type_freed(mtp, size);
}
@ -789,15 +840,15 @@ free_domain(void *addr, struct malloc_type *mtp)
panic("free_domain: address %p(%p) has not been allocated.\n",
addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
if (__predict_true(!malloc_large_slab(slab))) {
size = zone->uz_size;
#ifdef INVARIANTS
free_save_type(addr, mtp, size);
#endif
uma_zfree_domain(zone, addr, slab);
} else {
size = slab->us_size;
uma_large_free(slab);
size = malloc_large_size(slab);
free_large(addr, size);
}
malloc_type_freed(mtp, size);
}
@ -844,10 +895,10 @@ realloc(void *addr, size_t size, struct malloc_type *mtp, int flags)
("realloc: address %p out of range", (void *)addr));
/* Get the size of the original block */
if (!(slab->us_flags & UMA_SLAB_MALLOC))
if (!malloc_large_slab(slab))
alloc = zone->uz_size;
else
alloc = slab->us_size;
alloc = malloc_large_size(slab);
/* Reuse the original block if appropriate */
if (size <= alloc

View File

@ -311,7 +311,7 @@ memguard_alloc(unsigned long req_size, int flags)
* When we pass our memory limit, reject sub-page allocations.
* Page-size and larger allocations will use the same amount
* of physical memory whether we allocate or hand off to
* uma_large_alloc(), so keep those.
* malloc_large(), so keep those.
*/
if (vmem_size(memguard_arena, VMEM_ALLOC) >= memguard_physlimit &&
req_size < PAGE_SIZE) {

View File

@ -615,7 +615,6 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef);
#define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kmem */
#define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */
#define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */
#define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */
/* 0x02, 0x40, and 0x80 are available */
/*

View File

@ -149,10 +149,10 @@ static struct sx uma_reclaim_lock;
* kmem soft limit, initialized by uma_set_limit(). Ensure that early
* allocations don't trigger a wakeup of the reclaim thread.
*/
static unsigned long uma_kmem_limit = LONG_MAX;
unsigned long uma_kmem_limit = LONG_MAX;
SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_limit, CTLFLAG_RD, &uma_kmem_limit, 0,
"UMA kernel memory soft limit");
static unsigned long uma_kmem_total;
unsigned long uma_kmem_total;
SYSCTL_ULONG(_vm, OID_AUTO, uma_kmem_total, CTLFLAG_RD, &uma_kmem_total, 0,
"UMA kernel memory usage");
@ -326,22 +326,6 @@ static int zone_warnings = 1;
SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
"Warn when UMA zones becomes full");
/* Adjust bytes under management by UMA. */
static inline void
uma_total_dec(unsigned long size)
{
atomic_subtract_long(&uma_kmem_total, size);
}
static inline void
uma_total_inc(unsigned long size)
{
if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
uma_reclaim_wakeup();
}
/*
* This routine checks to see whether or not it's safe to enable buckets.
*/
@ -4083,57 +4067,6 @@ uma_zone_exhausted_nolock(uma_zone_t zone)
return (zone->uz_sleepers > 0);
}
void *
uma_large_malloc_domain(vm_size_t size, int domain, int wait)
{
struct domainset *policy;
vm_offset_t addr;
uma_slab_t slab;
if (domain != UMA_ANYDOMAIN) {
/* avoid allocs targeting empty domains */
if (VM_DOMAIN_EMPTY(domain))
domain = UMA_ANYDOMAIN;
}
slab = zone_alloc_item(slabzone, NULL, domain, wait);
if (slab == NULL)
return (NULL);
policy = (domain == UMA_ANYDOMAIN) ? DOMAINSET_RR() :
DOMAINSET_FIXED(domain);
addr = kmem_malloc_domainset(policy, size, wait);
if (addr != 0) {
vsetzoneslab(addr, NULL, slab);
slab->us_data = (void *)addr;
slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC;
slab->us_size = size;
slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE(
pmap_kextract(addr)));
uma_total_inc(size);
} else {
zone_free_item(slabzone, slab, NULL, SKIP_NONE);
}
return ((void *)addr);
}
void *
uma_large_malloc(vm_size_t size, int wait)
{
return uma_large_malloc_domain(size, UMA_ANYDOMAIN, wait);
}
void
uma_large_free(uma_slab_t slab)
{
KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0,
("uma_large_free: Memory not allocated with uma_large_malloc."));
kmem_free((vm_offset_t)slab->us_data, slab->us_size);
uma_total_dec(slab->us_size);
zone_free_item(slabzone, slab, NULL, SKIP_NONE);
}
static void
uma_zero_item(void *item, uma_zone_t zone)
{

View File

@ -281,10 +281,7 @@ BITSET_DEFINE(slabbits, SLAB_SETSIZE);
* store and subdivides it into individually allocatable items.
*/
struct uma_slab {
union {
LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */
unsigned long _us_size; /* Size of allocation */
} us_type;
LIST_ENTRY(uma_slab) us_link; /* slabs in zone */
SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */
uint8_t *us_data; /* First item */
struct slabbits us_free; /* Free bitmask. */
@ -296,9 +293,6 @@ struct uma_slab {
uint8_t us_domain; /* Backing NUMA domain. */
};
#define us_link us_type._us_link
#define us_size us_type._us_size
#if MAXMEMDOM >= 255
#error "Slab domain type insufficient"
#endif
@ -402,9 +396,6 @@ struct uma_zone {
#ifdef _KERNEL
/* Internal prototypes */
static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
void *uma_large_malloc(vm_size_t size, int wait);
void *uma_large_malloc_domain(vm_size_t size, int domain, int wait);
void uma_large_free(uma_slab_t slab);
/* Lock Macros */
@ -500,6 +491,25 @@ vsetzoneslab(vm_offset_t va, uma_zone_t zone, uma_slab_t slab)
p->plinks.uma.zone = zone;
}
extern unsigned long uma_kmem_limit;
extern unsigned long uma_kmem_total;
/* Adjust bytes under management by UMA. */
static inline void
uma_total_dec(unsigned long size)
{
atomic_subtract_long(&uma_kmem_total, size);
}
static inline void
uma_total_inc(unsigned long size)
{
if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit)
uma_reclaim_wakeup();
}
/*
* The following two functions may be defined by architecture specific code
* if they can provide more efficient allocation functions. This is useful