diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 9ef7e34c9a2c..3aa8d1a4f45d 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -239,16 +239,22 @@ sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS) { u_long size; - size = vmem_size(kmem_arena, VMEM_ALLOC); + size = uma_size(); return (sysctl_handle_long(oidp, &size, 0, req)); } static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { - u_long size; + u_long size, limit; - size = vmem_size(kmem_arena, VMEM_FREE); + /* The sysctl is unsigned, implement as a saturation value. */ + size = uma_size(); + limit = uma_limit(); + if (size > limit) + size = 0; + else + size = limit - size; return (sysctl_handle_long(oidp, &size, 0, req)); } @@ -669,19 +675,6 @@ reallocf(void *addr, unsigned long size, struct malloc_type *mtp, int flags) return (mem); } -/* - * Wake the uma reclamation pagedaemon thread when we exhaust KVA. It - * will call the lowmem handler and uma_reclaim() callbacks in a - * context that is safe. - */ -static void -kmem_reclaim(vmem_t *vm, int flags) -{ - - uma_reclaim_wakeup(); - pagedaemon_wakeup(); -} - #ifndef __sparc64__ CTASSERT(VM_KMEM_SIZE_SCALE >= 1); #endif @@ -759,9 +752,7 @@ kmeminit(void) #else tmp = vm_kmem_size; #endif - vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE, - 0, 0); - vmem_set_reclaim(kmem_arena, kmem_reclaim); + uma_set_limit(tmp); #ifdef DEBUG_MEMGUARD /* @@ -769,7 +760,7 @@ kmeminit(void) * replacement allocator used for detecting tamper-after-free * scenarios as they occur. It is only used for debugging. */ - memguard_init(kmem_arena); + memguard_init(kernel_arena); #endif } diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 4dd4fb8eff1e..5b88a3f69cac 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -137,6 +137,7 @@ struct vmem { int vm_nbusytag; vmem_size_t vm_inuse; vmem_size_t vm_size; + vmem_size_t vm_limit; /* Used on import. */ vmem_import_t *vm_importfn; @@ -228,11 +229,11 @@ static uma_zone_t vmem_bt_zone; /* boot time arena storage. */ static struct vmem kernel_arena_storage; -static struct vmem kmem_arena_storage; static struct vmem buffer_arena_storage; static struct vmem transient_arena_storage; +/* kernel and kmem arenas are aliased for backwards KPI compat. */ vmem_t *kernel_arena = &kernel_arena_storage; -vmem_t *kmem_arena = &kmem_arena_storage; +vmem_t *kmem_arena = &kernel_arena_storage; vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; @@ -254,11 +255,11 @@ bt_fill(vmem_t *vm, int flags) VMEM_ASSERT_LOCKED(vm); /* - * Only allow the kmem arena to dip into reserve tags. It is the + * Only allow the kernel arena to dip into reserve tags. It is the * vmem where new tags come from. */ flags &= BT_FLAGS; - if (vm != kmem_arena) + if (vm != kernel_arena) flags &= ~M_USE_RESERVE; /* @@ -615,22 +616,22 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { vmem_addr_t addr; - *pflag = UMA_SLAB_KMEM; + *pflag = UMA_SLAB_KERNEL; /* * Single thread boundary tag allocation so that the address space * and memory are added in one atomic operation. */ mtx_lock(&vmem_bt_lock); - if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, + if (vmem_xalloc(kernel_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) { - if (kmem_back(kmem_object, addr, bytes, + if (kmem_back(kernel_object, addr, bytes, M_NOWAIT | M_USE_RESERVE) == 0) { mtx_unlock(&vmem_bt_lock); return ((void *)addr); } - vmem_xfree(kmem_arena, addr, bytes); + vmem_xfree(kernel_arena, addr, bytes); mtx_unlock(&vmem_bt_lock); /* * Out of memory, not address space. This may not even be @@ -835,7 +836,7 @@ vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) int error; if (vm->vm_importfn == NULL) - return EINVAL; + return (EINVAL); /* * To make sure we get a span that meets the alignment we double it @@ -845,6 +846,9 @@ vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) size = (align * 2) + size; size = roundup(size, vm->vm_import_quantum); + if (vm->vm_limit != 0 && vm->vm_limit < vm->vm_size + size) + return (ENOMEM); + /* * Hide MAXALLOC tags so we're guaranteed to be able to add this * span and the tag we want to allocate from it. @@ -856,7 +860,7 @@ vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) VMEM_LOCK(vm); vm->vm_nfreetags += BT_MAXALLOC; if (error) - return ENOMEM; + return (ENOMEM); vmem_add1(vm, addr, size, BT_TYPE_SPAN); @@ -977,6 +981,15 @@ vmem_set_import(vmem_t *vm, vmem_import_t *importfn, VMEM_UNLOCK(vm); } +void +vmem_set_limit(vmem_t *vm, vmem_size_t limit) +{ + + VMEM_LOCK(vm); + vm->vm_limit = limit; + VMEM_UNLOCK(vm); +} + void vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn) { @@ -1009,6 +1022,7 @@ vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, vm->vm_quantum_shift = flsl(quantum) - 1; vm->vm_nbusytag = 0; vm->vm_size = 0; + vm->vm_limit = 0; vm->vm_inuse = 0; qc_init(vm, qcache_max); diff --git a/sys/sys/vmem.h b/sys/sys/vmem.h index ca8426733e3b..e9dc54d154cb 100644 --- a/sys/sys/vmem.h +++ b/sys/sys/vmem.h @@ -75,6 +75,12 @@ void vmem_destroy(vmem_t *); void vmem_set_import(vmem_t *vm, vmem_import_t *importfn, vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum); +/* + * Set a limit on the total size of a vmem. + */ + +void vmem_set_limit(vmem_t *vm, vmem_size_t limit); + /* * Set a callback for reclaiming memory when space is exhausted: */ diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c index 4559ff08c5cf..be926a3dc842 100644 --- a/sys/vm/memguard.c +++ b/sys/vm/memguard.c @@ -66,7 +66,7 @@ __FBSDID("$FreeBSD$"); static SYSCTL_NODE(_vm, OID_AUTO, memguard, CTLFLAG_RW, NULL, "MemGuard data"); /* - * The vm_memguard_divisor variable controls how much of kmem_map should be + * The vm_memguard_divisor variable controls how much of kernel_arena should be * reserved for MemGuard. */ static u_int vm_memguard_divisor; @@ -157,7 +157,7 @@ SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD, /* * Return a fudged value to be used for vm_kmem_size for allocating - * the kmem_map. The memguard memory will be a submap. + * the kernel_arena. The memguard memory will be a submap. */ unsigned long memguard_fudge(unsigned long km_size, const struct vm_map *parent_map) @@ -348,7 +348,7 @@ memguard_alloc(unsigned long req_size, int flags) addr = origaddr; if (do_guard) addr += PAGE_SIZE; - rv = kmem_back(kmem_object, addr, size_p, flags); + rv = kmem_back(kernel_object, addr, size_p, flags); if (rv != KERN_SUCCESS) { vmem_xfree(memguard_arena, origaddr, size_v); memguard_fail_pgs++; @@ -418,7 +418,7 @@ memguard_free(void *ptr) * vm_map lock to serialize updates to memguard_wasted, since * we had the lock at increment. */ - kmem_unback(kmem_object, addr, size); + kmem_unback(kernel_object, addr, size); if (sizev > size) addr -= PAGE_SIZE; vmem_xfree(memguard_arena, addr, sizev); diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 09bb1341ec4a..1ea8ee06245b 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -609,12 +609,11 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef); * These flags are setable in the allocf and visible in the freef. */ #define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ -#define UMA_SLAB_KMEM 0x02 /* Slab alloced from kmem_map */ #define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */ #define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ #define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */ #define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ -/* 0x40 and 0x80 are available */ +/* 0x02, 0x40 and 0x80 are available */ /* * Used to pre-fill a zone with some number of items diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index aac2fbcf2a47..17a7cfd9fdfc 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -147,6 +147,10 @@ static struct mtx uma_boot_pages_mtx; static struct sx uma_drain_lock; +/* kmem soft limit. */ +static unsigned long uma_kmem_limit; +static volatile unsigned long uma_kmem_total; + /* Is the VM done starting up? */ static int booted = 0; #define UMA_STARTUP 1 @@ -285,6 +289,22 @@ static int zone_warnings = 1; SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0, "Warn when UMA zones becomes full"); +/* Adjust bytes under management by UMA. */ +static inline void +uma_total_dec(unsigned long size) +{ + + atomic_subtract_long(&uma_kmem_total, size); +} + +static inline void +uma_total_inc(unsigned long size) +{ + + if (atomic_fetchadd_long(&uma_kmem_total, size) > uma_kmem_limit) + uma_reclaim_wakeup(); +} + /* * This routine checks to see whether or not it's safe to enable buckets. */ @@ -831,6 +851,7 @@ keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start) if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); + uma_total_dec(PAGE_SIZE * keg->uk_ppera); } /* @@ -935,6 +956,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) { uma_alloc allocf; uma_slab_t slab; + unsigned long size; uint8_t *mem; uint8_t flags; int i; @@ -945,6 +967,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) allocf = keg->uk_allocf; KEG_UNLOCK(keg); + size = keg->uk_ppera * PAGE_SIZE; if (keg->uk_flags & UMA_ZONE_OFFPAGE) { slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); @@ -968,13 +991,14 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) wait |= M_NODUMP; /* zone is passed for legacy reasons. */ - mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait); + mem = allocf(zone, size, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); slab = NULL; goto out; } + uma_total_inc(size); /* Point the slab into the allocated memory */ if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) @@ -1079,8 +1103,8 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { void *p; /* Returned page */ - *pflag = UMA_SLAB_KMEM; - p = (void *) kmem_malloc(kmem_arena, bytes, wait); + *pflag = UMA_SLAB_KERNEL; + p = (void *) kmem_malloc(kernel_arena, bytes, wait); return (p); } @@ -1161,9 +1185,7 @@ page_free(void *mem, vm_size_t size, uint8_t flags) { struct vmem *vmem; - if (flags & UMA_SLAB_KMEM) - vmem = kmem_arena; - else if (flags & UMA_SLAB_KERNEL) + if (flags & UMA_SLAB_KERNEL) vmem = kernel_arena; else panic("UMA: page_free used with invalid flags %x", flags); @@ -3132,31 +3154,33 @@ uma_reclaim(void) sx_xunlock(&uma_drain_lock); } -static int uma_reclaim_needed; +static volatile int uma_reclaim_needed; void uma_reclaim_wakeup(void) { - uma_reclaim_needed = 1; - wakeup(&uma_reclaim_needed); + if (atomic_fetchadd_int(&uma_reclaim_needed, 1) == 0) + wakeup(uma_reclaim); } void uma_reclaim_worker(void *arg __unused) { - sx_xlock(&uma_drain_lock); for (;;) { - sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM, - "umarcl", 0); - if (uma_reclaim_needed) { - uma_reclaim_needed = 0; - sx_xunlock(&uma_drain_lock); - EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); - sx_xlock(&uma_drain_lock); - uma_reclaim_locked(true); - } + sx_xlock(&uma_drain_lock); + while (uma_reclaim_needed == 0) + sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl", + hz); + sx_xunlock(&uma_drain_lock); + EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); + sx_xlock(&uma_drain_lock); + uma_reclaim_locked(true); + atomic_set_int(&uma_reclaim_needed, 0); + sx_xunlock(&uma_drain_lock); + /* Don't fire more than once per-second. */ + pause("umarclslp", hz); } } @@ -3194,6 +3218,7 @@ uma_large_malloc(vm_size_t size, int wait) slab->us_data = mem; slab->us_flags = flags | UMA_SLAB_MALLOC; slab->us_size = size; + uma_total_inc(size); } else { zone_free_item(slabzone, slab, NULL, SKIP_NONE); } @@ -3206,6 +3231,7 @@ uma_large_free(uma_slab_t slab) { page_free(slab->us_data, slab->us_size, slab->us_flags); + uma_total_dec(slab->us_size); zone_free_item(slabzone, slab, NULL, SKIP_NONE); } @@ -3221,6 +3247,27 @@ uma_zero_item(void *item, uma_zone_t zone) bzero(item, zone->uz_size); } +unsigned long +uma_limit(void) +{ + + return (uma_kmem_limit); +} + +void +uma_set_limit(unsigned long limit) +{ + + uma_kmem_limit = limit; +} + +unsigned long +uma_size(void) +{ + + return uma_kmem_total; +} + void uma_print_stats(void) { diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index e2b543da42a4..158c2383c885 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -425,6 +425,13 @@ vsetslab(vm_offset_t va, uma_slab_t slab) void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait); void uma_small_free(void *mem, vm_size_t size, uint8_t flags); + +/* Set a global soft limit on UMA managed memory. */ +void uma_set_limit(unsigned long limit); +unsigned long uma_limit(void); + +/* Return the amount of memory managed by UMA. */ +unsigned long uma_size(void); #endif /* _KERNEL */ #endif /* VM_UMA_INT_H */ diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 8eb851024459..2931f8085ca5 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -164,11 +164,13 @@ vm_offset_t kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { - vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; + vm_object_t object = kernel_object; vm_offset_t addr, i, offset; vm_page_t m; int pflags, tries; + KASSERT(vmem == kernel_arena, + ("kmem_alloc_attr: Only kernel_arena is supported.")); size = round_page(size); if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); @@ -220,12 +222,14 @@ kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { - vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; + vm_object_t object = kernel_object; vm_offset_t addr, offset, tmp; vm_page_t end_m, m; u_long npages; int pflags, tries; + KASSERT(vmem == kernel_arena, + ("kmem_alloc_contig: Only kernel_arena is supported.")); size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); @@ -314,12 +318,13 @@ kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) vm_offset_t addr; int rv; + KASSERT(vmem == kernel_arena, + ("kmem_malloc: Only kernel_arena is supported.")); size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); - rv = kmem_back((vmem == kmem_arena) ? kmem_object : kernel_object, - addr, size, flags); + rv = kmem_back(kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(vmem, addr, size); return (0); @@ -339,8 +344,8 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) vm_page_t m, mpred; int pflags; - KASSERT(object == kmem_object || object == kernel_object, - ("kmem_back: only supports kernel objects.")); + KASSERT(object == kernel_object, + ("kmem_back: only supports kernel object.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; @@ -396,8 +401,8 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) vm_page_t m, next; vm_offset_t end, offset; - KASSERT(object == kmem_object || object == kernel_object, - ("kmem_unback: only supports kernel objects.")); + KASSERT(object == kernel_object, + ("kmem_unback: only supports kernel object.")); pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -422,9 +427,10 @@ void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { + KASSERT(vmem == kernel_arena, + ("kmem_free: Only kernel_arena is supported.")); size = round_page(size); - kmem_unback((vmem == kmem_arena) ? kmem_object : kernel_object, - addr, size); + kmem_unback(kernel_object, addr, size); vmem_free(vmem, addr, size); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 15c4b7ea6a3d..905ac1b93138 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1189,9 +1189,9 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_inherit_t inheritance; VM_MAP_ASSERT_LOCKED(map); - KASSERT((object != kmem_object && object != kernel_object) || + KASSERT(object != kernel_object || (cow & MAP_COPY_ON_WRITE) == 0, - ("vm_map_insert: kmem or kernel object and COW")); + ("vm_map_insert: kernel object and COW")); KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0, ("vm_map_insert: paradoxical MAP_NOFAULT request")); KASSERT((prot & ~max) == 0, @@ -2990,7 +2990,7 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) VM_OBJECT_WLOCK(object); if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT | OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || - object == kernel_object || object == kmem_object)) { + object == kernel_object)) { vm_object_collapse(object); /* diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 08827dfd3508..d1ba8b989256 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -144,7 +144,6 @@ struct object_q vm_object_list; struct mtx vm_object_list_mtx; /* lock for object list and count */ struct vm_object kernel_object_store; -struct vm_object kmem_object_store; static SYSCTL_NODE(_vm_stats, OID_AUTO, object, CTLFLAG_RD, 0, "VM object stats"); @@ -294,14 +293,6 @@ vm_object_init(void) kernel_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); #endif - rw_init(&kmem_object->lock, "kmem vm object"); - _vm_object_allocate(OBJT_PHYS, atop(VM_MAX_KERNEL_ADDRESS - - VM_MIN_KERNEL_ADDRESS), kmem_object); -#if VM_NRESERVLEVEL > 0 - kmem_object->flags |= OBJ_COLORED; - kmem_object->pg_color = (u_short)atop(VM_MIN_KERNEL_ADDRESS); -#endif - /* * The lock portion of struct vm_object must be type stable due * to vm_pageout_fallback_object_lock locking a vm object diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index d51158f828e8..4aa7ede14659 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -227,10 +227,10 @@ extern struct object_q vm_object_list; /* list of allocated objects */ extern struct mtx vm_object_list_mtx; /* lock for object list and count */ extern struct vm_object kernel_object_store; -extern struct vm_object kmem_object_store; +/* kernel and kmem are aliased for backwards KPI compat. */ #define kernel_object (&kernel_object_store) -#define kmem_object (&kmem_object_store) +#define kmem_object (&kernel_object_store) #define VM_OBJECT_ASSERT_LOCKED(object) \ rw_assert(&(object)->lock, RA_LOCKED)