- Add a general purpose resource allocator, vmem, from NetBSD. It was

originally inspired by the Solaris vmem detailed in the proceedings
   of usenix 2001.  The NetBSD version was heavily refactored for bugs
   and simplicity.
 - Use this resource allocator to allocate the buffer and transient maps.
   Buffer cache defrags are reduced by 25% when used by filesystems with
   mixed block sizes.  Ultimately this may permit dynamic buffer cache
   sizing on low KVA machines.

Discussed with:	alc, kib, attilio
Tested by:	pho
Sponsored by:	EMC / Isilon Storage Division
This commit is contained in:
Jeff Roberson 2013-06-28 03:51:20 +00:00
parent 837610eb04
commit 5f51836645
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=252330
13 changed files with 1550 additions and 67 deletions

View File

@ -2797,6 +2797,7 @@ kern/subr_trap.c standard
kern/subr_turnstile.c standard
kern/subr_uio.c standard
kern/subr_unit.c standard
kern/subr_vmem.c standard
kern/subr_witness.c optional witness
kern/sys_capability.c standard
kern/sys_generic.c standard

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/stack.h>
#include <sys/sysctl.h>
#include <sys/vmem.h>
#include <sys/errno.h>
#include <geom/geom.h>
@ -626,7 +627,6 @@ g_io_transient_map_bio(struct bio *bp)
vm_offset_t addr;
long size;
u_int retried;
int rv;
KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
@ -636,10 +636,7 @@ g_io_transient_map_bio(struct bio *bp)
retried = 0;
atomic_add_long(&transient_maps, 1);
retry:
vm_map_lock(bio_transient_map);
if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map),
size, &addr)) {
vm_map_unlock(bio_transient_map);
if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
if (transient_map_retries != 0 &&
retried >= transient_map_retries) {
g_io_deliver(bp, EDEADLK/* XXXKIB */);
@ -651,7 +648,7 @@ g_io_transient_map_bio(struct bio *bp)
/*
* Naive attempt to quisce the I/O to get more
* in-flight requests completed and defragment
* the bio_transient_map.
* the transient_arena.
*/
CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
bp, bp->bio_to->name, retried);
@ -661,12 +658,6 @@ g_io_transient_map_bio(struct bio *bp)
goto retry;
}
}
rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size,
VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
KASSERT(rv == KERN_SUCCESS,
("vm_map_insert(bio_transient_map) rv %d %jx %lx",
rv, (uintmax_t)addr, size));
vm_map_unlock(bio_transient_map);
atomic_add_int(&inflight_transient_maps, 1);
pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;

1372
sys/kern/subr_vmem.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sysctl.h>
#include <sys/vmem.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
#include <geom/geom.h>
@ -920,13 +921,13 @@ bfreekva(struct buf *bp)
atomic_subtract_long(&bufspace, bp->b_kvasize);
if ((bp->b_flags & B_UNMAPPED) == 0) {
BUF_CHECK_MAPPED(bp);
vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvabase,
(vm_offset_t)bp->b_kvabase + bp->b_kvasize);
vmem_free(buffer_arena, (vm_offset_t)bp->b_kvabase,
bp->b_kvasize);
} else {
BUF_CHECK_UNMAPPED(bp);
if ((bp->b_flags & B_KVAALLOC) != 0) {
vm_map_remove(buffer_map, (vm_offset_t)bp->b_kvaalloc,
(vm_offset_t)bp->b_kvaalloc + bp->b_kvasize);
vmem_free(buffer_arena, (vm_offset_t)bp->b_kvaalloc,
bp->b_kvasize);
}
atomic_subtract_long(&unmapped_bufspace, bp->b_kvasize);
bp->b_flags &= ~(B_UNMAPPED | B_KVAALLOC);
@ -2019,15 +2020,11 @@ static int
allocbufkva(struct buf *bp, int maxsize, int gbflags)
{
vm_offset_t addr;
int rv;
bfreekva(bp);
addr = 0;
vm_map_lock(buffer_map);
if (vm_map_findspace(buffer_map, vm_map_min(buffer_map), maxsize,
&addr)) {
vm_map_unlock(buffer_map);
if (vmem_alloc(buffer_arena, maxsize, M_BESTFIT | M_NOWAIT, &addr)) {
/*
* Buffer map is too fragmented. Request the caller
* to defragment the map.
@ -2035,10 +2032,6 @@ allocbufkva(struct buf *bp, int maxsize, int gbflags)
atomic_add_int(&bufdefragcnt, 1);
return (1);
}
rv = vm_map_insert(buffer_map, NULL, 0, addr, addr + maxsize,
VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
KASSERT(rv == KERN_SUCCESS, ("vm_map_insert(buffer_map) rv %d", rv));
vm_map_unlock(buffer_map);
setbufkva(bp, addr, maxsize, gbflags);
atomic_add_long(&bufspace, bp->b_kvasize);
return (0);
@ -2389,7 +2382,7 @@ getnewbuf_scan(int maxsize, int defrag, int unmapped, int metadata)
* We block if:
* We have insufficient buffer headers
* We have insufficient buffer space
* buffer_map is too fragmented ( space reservation fails )
* buffer_arena is too fragmented ( space reservation fails )
* If we have to flush dirty buffers ( but we try to avoid this )
*/
static struct buf *
@ -3593,7 +3586,7 @@ biodone(struct bio *bp)
done(bp);
if (transient) {
pmap_qremove(start, OFF_TO_IDX(end - start));
vm_map_remove(bio_transient_map, start, end);
vmem_free(transient_arena, start, end - start);
atomic_add_int(&inflight_transient_maps, -1);
}
}

View File

@ -51,6 +51,8 @@
#define M_NOVM 0x0200 /* don't ask VM for pages */
#define M_USE_RESERVE 0x0400 /* can alloc out of reserve memory */
#define M_NODUMP 0x0800 /* don't dump pages in this allocation */
#define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */
#define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */
#define M_MAGIC 877983977 /* time when first defined :-) */

135
sys/sys/vmem.h Normal file
View File

@ -0,0 +1,135 @@
/*-
* Copyright (c)2006 YAMAMOTO Takashi,
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* From $NetBSD: vmem.h,v 1.20 2013/01/29 21:26:24 para Exp $ */
/* $FreeBSD$ */
#ifndef _SYS_VMEM_H_
#define _SYS_VMEM_H_
#include <sys/types.h>
#ifdef _KERNEL
typedef struct vmem vmem_t;
typedef uintptr_t vmem_addr_t;
typedef size_t vmem_size_t;
#define VMEM_ADDR_MIN 0
#define VMEM_ADDR_MAX (~(vmem_addr_t)0)
typedef int (vmem_import_t)(void *, vmem_size_t, int, vmem_addr_t *);
typedef void (vmem_release_t)(void *, vmem_addr_t, vmem_size_t);
typedef void (vmem_reclaim_t)(vmem_t *, int);
/*
* Create a vmem:
* name - Name of the region
* base - Initial span start (optional)
* size - Initial span size
* quantum - Natural unit of allocation (ie PAGE_SIZE, 1, etc)
* qcache_max - Maximum size to quantum cache. This creates a UMA
* cache for each multiple of quantum up to qcache_max.
* flags - M_* flags
*/
vmem_t *vmem_create(const char *name, vmem_addr_t base,
vmem_size_t size, vmem_size_t quantum, vmem_size_t qcache_max, int flags);
vmem_t *vmem_init(vmem_t *vm, const char *name, vmem_addr_t base,
vmem_size_t size, vmem_size_t quantum, vmem_size_t qcache_max, int flags);
void vmem_destroy(vmem_t *);
/*
* Set callbacks for bringing in dynamic regions:
* importfn - Backing store import routine.
* releasefn - Backing store release routine.
* arg - Backing store argument
* import_quantum - Size to import from backing store
*/
void vmem_set_import(vmem_t *vm, vmem_import_t *importfn,
vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum);
/*
* Set a callback for reclaiming memory when space is exhausted:
*/
void vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn);
/*
* Allocate and free linear regions from a vmem. Must specify
* BESTFIT or FIRSTFIT. Free is non-blocking. These routines
* respect the quantum caches.
*/
int vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp);
void vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size);
/*
* Constrained allocate and free routines. These bypass the quantum cache.
* size - Size in units of 1, not quantum.
* align - Required alignment of the start of region
* phase - Offset from alignment
* nocross - Illegal boundary
* minaddr - Minimum allowed address for last byte
* maxaddr - Maximum allowed address for first byte
* flags - M_* flags
* addrp - result
*/
int vmem_xalloc(vmem_t *vm, vmem_size_t size, vmem_size_t align,
vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr,
vmem_addr_t maxaddr, int flags, vmem_addr_t *addrp);
void vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size);
/*
* Add a static region to a vmem after create. This won't be freed
* until the vmem is destroyed.
*/
int vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags);
/*
* Given roundup size to the vmem's native quantum size.
*/
vmem_size_t vmem_roundup_size(vmem_t *vm, vmem_size_t size);
/*
* Report vmem utilization according to the requested type.
*/
vmem_size_t vmem_size(vmem_t *vm, int typemask);
void vmem_whatis(vmem_addr_t addr, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_print(vmem_addr_t addr, const char *, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_printall(const char *, int (*fn)(const char *, ...)
__printflike(1, 2));
void vmem_startup(void);
/* vmem_size typemask */
#define VMEM_ALLOC 0x01
#define VMEM_FREE 0x02
#endif /* _KERNEL */
#endif /* !_SYS_VMEM_H_ */

View File

@ -134,10 +134,6 @@ struct kva_md_info {
vm_offset_t buffer_eva;
vm_offset_t clean_sva;
vm_offset_t clean_eva;
vm_offset_t pager_sva;
vm_offset_t pager_eva;
vm_offset_t bio_transient_sva;
vm_offset_t bio_transient_eva;
};
extern struct kva_md_info kmi;

View File

@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pipe.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -120,6 +121,7 @@ vm_mem_init(dummy)
/*
* Initialize other VM packages
*/
vmem_startup();
vm_object_init();
vm_map_startup();
kmem_init(virtual_avail, virtual_end);
@ -183,29 +185,31 @@ vm_ksubmap_init(struct kva_md_info *kmi)
if ((vm_size_t)((char *)v - firstaddr) != size)
panic("startup: table size inconsistency");
size = (long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS +
(long)bio_transient_maxcnt * MAXPHYS;
clean_map = kmem_suballoc(kernel_map, &kmi->clean_sva, &kmi->clean_eva,
(long)nbuf * BKVASIZE + (long)nswbuf * MAXPHYS +
(long)bio_transient_maxcnt * MAXPHYS, TRUE);
buffer_map = kmem_suballoc(clean_map, &kmi->buffer_sva,
&kmi->buffer_eva, (long)nbuf * BKVASIZE, FALSE);
buffer_map->system_map = 1;
size, TRUE);
size = (long)nbuf * BKVASIZE;
kmi->buffer_sva = kmem_alloc_nofault(clean_map, size);
kmi->buffer_eva = kmi->buffer_sva + size;
vmem_init(buffer_arena, "buffer arena", kmi->buffer_sva, size,
PAGE_SIZE, 0, 0);
size = (long)nswbuf * MAXPHYS;
swapbkva = kmem_alloc_nofault(clean_map, size);
if (!swapbkva)
panic("Not enough clean_map VM space for pager buffers");
if (bio_transient_maxcnt != 0) {
bio_transient_map = kmem_suballoc(clean_map,
&kmi->bio_transient_sva, &kmi->bio_transient_eva,
(long)bio_transient_maxcnt * MAXPHYS, FALSE);
bio_transient_map->system_map = 1;
size = (long)bio_transient_maxcnt * MAXPHYS;
vmem_init(transient_arena, "transient arena",
kmem_alloc_nofault(clean_map, size),
size, PAGE_SIZE, 0, 0);
}
pager_map = kmem_suballoc(clean_map, &kmi->pager_sva, &kmi->pager_eva,
(long)nswbuf * MAXPHYS, FALSE);
pager_map->system_map = 1;
exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE);
pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva,
FALSE);
/*
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
*/
}

View File

@ -89,8 +89,6 @@ vm_map_t kernel_map;
vm_map_t kmem_map;
vm_map_t exec_map;
vm_map_t pipe_map;
vm_map_t buffer_map;
vm_map_t bio_transient_map;
const void *zero_region;
CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0);

View File

@ -64,11 +64,13 @@
#define _VM_VM_KERN_H_ 1
/* Kernel memory management definitions. */
extern vm_map_t buffer_map;
extern vm_map_t kernel_map;
extern vm_map_t kmem_map;
extern vm_map_t exec_map;
extern vm_map_t pipe_map;
extern struct vmem *buffer_arena;
extern struct vmem *transient_arena;
extern vm_offset_t swapbkva;
extern u_long vm_kmem_size;
#endif /* _VM_VM_KERN_H_ */

View File

@ -2231,12 +2231,6 @@ vm_object_in_map(vm_object_t object)
/* sx_sunlock(&allproc_lock); */
if (_vm_object_in_map(kernel_map, object, 0))
return 1;
if (_vm_object_in_map(kmem_map, object, 0))
return 1;
if (_vm_object_in_map(pager_map, object, 0))
return 1;
if (_vm_object_in_map(buffer_map, object, 0))
return 1;
return 0;
}

View File

@ -78,6 +78,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
@ -174,11 +175,10 @@ static const int npagers = sizeof(pagertab) / sizeof(pagertab[0]);
* cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
* (MAXPHYS == 64k) if you want to get the most efficiency.
*/
vm_map_t pager_map;
static int bswneeded;
static vm_offset_t swapbkva; /* swap buffers kva */
struct mtx pbuf_mtx;
struct mtx_padalign pbuf_mtx;
static TAILQ_HEAD(swqueue, buf) bswlist;
static int bswneeded;
vm_offset_t swapbkva; /* swap buffers kva */
void
vm_pager_init()
@ -215,10 +215,6 @@ vm_pager_bufferinit()
cluster_pbuf_freecnt = nswbuf / 2;
vnode_pbuf_freecnt = nswbuf / 2 + 1;
swapbkva = kmem_alloc_nofault(pager_map, nswbuf * MAXPHYS);
if (!swapbkva)
panic("Not enough pager_map VM space for physical buffers");
}
/*

View File

@ -95,9 +95,8 @@ extern struct pagerops mgtdevicepagerops;
#ifdef _KERNEL
extern vm_map_t pager_map;
extern struct pagerops *pagertab[];
extern struct mtx pbuf_mtx;
extern struct mtx_padalign pbuf_mtx;
vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t,
vm_ooffset_t, struct ucred *);