Allocate pager bufs from UMA instead of 80-ish mutex protected linked list.
o In vm_pager_bufferinit() create pbuf_zone and start accounting on how many pbufs are we going to have set. In various subsystems that are going to utilize pbufs create private zones via call to pbuf_zsecond_create(). The latter calls uma_zsecond_create(), and sets a limit on created zone. After startup preallocate pbufs according to requirements of all pbuf zones. Subsystems that used to have a private limit with old allocator now have private pbuf zones: md(4), fusefs, NFS client, smbfs, VFS cluster, FFS, swap, vnode pager. The following subsystems use shared pbuf zone: cam(4), nvme(4), physio(9), aio(4). They should have their private limits, but changing that is out of scope of this commit. o Fetch tunable value of kern.nswbuf from init_param2() and while here move NSWBUF_MIN to opt_param.h and eliminate opt_swap.h, that was holding only this option. Default values aren't touched by this commit, but they probably should be reviewed wrt to modern hardware. This change removes a tight bottleneck from sendfile(2) operation, that uses pbufs in vnode pager. Other pagers also would benefit from faster allocation. Together with: gallatin Tested by: pho
This commit is contained in:
parent
7c895edb66
commit
756a541279
@ -936,7 +936,7 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
|
||||
/*
|
||||
* Get the buffer.
|
||||
*/
|
||||
mapinfo->bp[i] = getpbuf(NULL);
|
||||
mapinfo->bp[i] = uma_zalloc(pbuf_zone, M_WAITOK);
|
||||
|
||||
/* put our pointer in the data slot */
|
||||
mapinfo->bp[i]->b_data = *data_ptrs[i];
|
||||
@ -962,9 +962,9 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
|
||||
for (j = 0; j < i; ++j) {
|
||||
*data_ptrs[j] = mapinfo->bp[j]->b_caller1;
|
||||
vunmapbuf(mapinfo->bp[j]);
|
||||
relpbuf(mapinfo->bp[j], NULL);
|
||||
uma_zfree(pbuf_zone, mapinfo->bp[j]);
|
||||
}
|
||||
relpbuf(mapinfo->bp[i], NULL);
|
||||
uma_zfree(pbuf_zone, mapinfo->bp[i]);
|
||||
PRELE(curproc);
|
||||
return(EACCES);
|
||||
}
|
||||
@ -1052,7 +1052,7 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
|
||||
vunmapbuf(mapinfo->bp[i]);
|
||||
|
||||
/* release the buffer */
|
||||
relpbuf(mapinfo->bp[i], NULL);
|
||||
uma_zfree(pbuf_zone, mapinfo->bp[i]);
|
||||
}
|
||||
|
||||
/* allow ourselves to be swapped once again */
|
||||
|
@ -187,7 +187,7 @@ NO_ADAPTIVE_SX
|
||||
NO_EVENTTIMERS opt_timer.h
|
||||
NO_OBSOLETE_CODE opt_global.h
|
||||
NO_SYSCTL_DESCR opt_global.h
|
||||
NSWBUF_MIN opt_swap.h
|
||||
NSWBUF_MIN opt_param.h
|
||||
MBUF_PACKET_ZONE_DISABLE opt_global.h
|
||||
PANIC_REBOOT_WAIT_TIME opt_panic.h
|
||||
PCI_HP opt_pci.h
|
||||
|
@ -231,7 +231,7 @@ static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list);
|
||||
#define NMASK (NINDIR-1)
|
||||
static int nshift;
|
||||
|
||||
static int md_vnode_pbuf_freecnt;
|
||||
static uma_zone_t md_pbuf_zone;
|
||||
|
||||
struct indir {
|
||||
uintptr_t *array;
|
||||
@ -962,7 +962,7 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
|
||||
auio.uio_iovcnt = piov - auio.uio_iov;
|
||||
piov = auio.uio_iov;
|
||||
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
|
||||
pb = getpbuf(&md_vnode_pbuf_freecnt);
|
||||
pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
|
||||
bp->bio_resid = len;
|
||||
unmapped_step:
|
||||
npages = atop(min(MAXPHYS, round_page(len + (ma_offs &
|
||||
@ -1013,7 +1013,7 @@ unmapped_step:
|
||||
if (len > 0)
|
||||
goto unmapped_step;
|
||||
}
|
||||
relpbuf(pb, &md_vnode_pbuf_freecnt);
|
||||
uma_zfree(md_pbuf_zone, pb);
|
||||
}
|
||||
|
||||
free(piov, M_MD);
|
||||
@ -2118,7 +2118,7 @@ g_md_init(struct g_class *mp __unused)
|
||||
sx_xunlock(&md_sx);
|
||||
}
|
||||
}
|
||||
md_vnode_pbuf_freecnt = nswbuf / 10;
|
||||
md_pbuf_zone = pbuf_zsecond_create("mdpbuf", nswbuf / 10);
|
||||
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
|
||||
0600, MDCTL_NAME);
|
||||
g_topology_lock();
|
||||
@ -2214,5 +2214,6 @@ g_md_fini(struct g_class *mp __unused)
|
||||
sx_destroy(&md_sx);
|
||||
if (status_dev != NULL)
|
||||
destroy_dev(status_dev);
|
||||
uma_zdestroy(md_pbuf_zone);
|
||||
delete_unrhdr(md_uh);
|
||||
}
|
||||
|
@ -1052,7 +1052,7 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
|
||||
* this passthrough command.
|
||||
*/
|
||||
PHOLD(curproc);
|
||||
buf = getpbuf(NULL);
|
||||
buf = uma_zalloc(pbuf_zone, M_WAITOK);
|
||||
buf->b_data = pt->buf;
|
||||
buf->b_bufsize = pt->len;
|
||||
buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
|
||||
@ -1101,7 +1101,7 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
|
||||
|
||||
err:
|
||||
if (buf != NULL) {
|
||||
relpbuf(buf, NULL);
|
||||
uma_zfree(pbuf_zone, buf);
|
||||
PRELE(curproc);
|
||||
}
|
||||
|
||||
|
@ -84,7 +84,7 @@ struct mtx fuse_mtx;
|
||||
extern struct vfsops fuse_vfsops;
|
||||
extern struct cdevsw fuse_cdevsw;
|
||||
extern struct vop_vector fuse_vnops;
|
||||
extern int fuse_pbuf_freecnt;
|
||||
extern uma_zone_t fuse_pbuf_zone;
|
||||
|
||||
static struct vfsconf fuse_vfsconf = {
|
||||
.vfc_version = VFS_VERSION,
|
||||
@ -122,7 +122,6 @@ fuse_loader(struct module *m, int what, void *arg)
|
||||
|
||||
switch (what) {
|
||||
case MOD_LOAD: /* kldload */
|
||||
fuse_pbuf_freecnt = nswbuf / 2 + 1;
|
||||
mtx_init(&fuse_mtx, "fuse_mtx", NULL, MTX_DEF);
|
||||
err = fuse_device_init();
|
||||
if (err) {
|
||||
@ -130,6 +129,7 @@ fuse_loader(struct module *m, int what, void *arg)
|
||||
return (err);
|
||||
}
|
||||
fuse_ipc_init();
|
||||
fuse_pbuf_zone = pbuf_zsecond_create("fusepbuf", nswbuf / 2);
|
||||
|
||||
/* vfs_modevent ignores its first arg */
|
||||
if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
|
||||
@ -144,6 +144,7 @@ fuse_loader(struct module *m, int what, void *arg)
|
||||
if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
|
||||
return (err);
|
||||
fuse_bringdown(eh_tag);
|
||||
uma_zdestroy(fuse_pbuf_zone);
|
||||
break;
|
||||
default:
|
||||
return (EINVAL);
|
||||
|
@ -201,7 +201,7 @@ static int fuse_reclaim_revoked = 0;
|
||||
SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
|
||||
&fuse_reclaim_revoked, 0, "");
|
||||
|
||||
int fuse_pbuf_freecnt = -1;
|
||||
uma_zone_t fuse_pbuf_zone;
|
||||
|
||||
#define fuse_vm_page_lock(m) vm_page_lock((m));
|
||||
#define fuse_vm_page_unlock(m) vm_page_unlock((m));
|
||||
@ -1824,7 +1824,7 @@ fuse_vnop_getpages(struct vop_getpages_args *ap)
|
||||
* We use only the kva address for the buffer, but this is extremely
|
||||
* convenient and fast.
|
||||
*/
|
||||
bp = getpbuf(&fuse_pbuf_freecnt);
|
||||
bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
|
||||
|
||||
kva = (vm_offset_t)bp->b_data;
|
||||
pmap_qenter(kva, pages, npages);
|
||||
@ -1845,7 +1845,7 @@ fuse_vnop_getpages(struct vop_getpages_args *ap)
|
||||
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
|
||||
pmap_qremove(kva, npages);
|
||||
|
||||
relpbuf(bp, &fuse_pbuf_freecnt);
|
||||
uma_zfree(fuse_pbuf_zone, bp);
|
||||
|
||||
if (error && (uio.uio_resid == count)) {
|
||||
FS_DEBUG("error %d\n", error);
|
||||
@ -1958,7 +1958,7 @@ fuse_vnop_putpages(struct vop_putpages_args *ap)
|
||||
* We use only the kva address for the buffer, but this is extremely
|
||||
* convenient and fast.
|
||||
*/
|
||||
bp = getpbuf(&fuse_pbuf_freecnt);
|
||||
bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
|
||||
|
||||
kva = (vm_offset_t)bp->b_data;
|
||||
pmap_qenter(kva, pages, npages);
|
||||
@ -1978,7 +1978,7 @@ fuse_vnop_putpages(struct vop_putpages_args *ap)
|
||||
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
|
||||
|
||||
pmap_qremove(kva, npages);
|
||||
relpbuf(bp, &fuse_pbuf_freecnt);
|
||||
uma_zfree(fuse_pbuf_zone, bp);
|
||||
|
||||
if (!error) {
|
||||
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
|
||||
|
@ -70,7 +70,7 @@ extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
|
||||
extern int newnfs_directio_enable;
|
||||
extern int nfs_keep_dirty_on_error;
|
||||
|
||||
int ncl_pbuf_freecnt = -1; /* start out unlimited */
|
||||
uma_zone_t ncl_pbuf_zone;
|
||||
|
||||
static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size,
|
||||
struct thread *td);
|
||||
@ -182,7 +182,7 @@ ncl_getpages(struct vop_getpages_args *ap)
|
||||
* We use only the kva address for the buffer, but this is extremely
|
||||
* convenient and fast.
|
||||
*/
|
||||
bp = getpbuf(&ncl_pbuf_freecnt);
|
||||
bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
|
||||
|
||||
kva = (vm_offset_t) bp->b_data;
|
||||
pmap_qenter(kva, pages, npages);
|
||||
@ -203,7 +203,7 @@ ncl_getpages(struct vop_getpages_args *ap)
|
||||
error = ncl_readrpc(vp, &uio, cred);
|
||||
pmap_qremove(kva, npages);
|
||||
|
||||
relpbuf(bp, &ncl_pbuf_freecnt);
|
||||
uma_zfree(ncl_pbuf_zone, bp);
|
||||
|
||||
if (error && (uio.uio_resid == count)) {
|
||||
printf("ncl_getpages: error %d\n", error);
|
||||
@ -793,7 +793,7 @@ do_sync:
|
||||
while (uiop->uio_resid > 0) {
|
||||
size = MIN(uiop->uio_resid, wsize);
|
||||
size = MIN(uiop->uio_iov->iov_len, size);
|
||||
bp = getpbuf(&ncl_pbuf_freecnt);
|
||||
bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
|
||||
t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
|
||||
t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
|
||||
t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
|
||||
@ -836,7 +836,7 @@ err_free:
|
||||
free(t_iov, M_NFSDIRECTIO);
|
||||
free(t_uio, M_NFSDIRECTIO);
|
||||
bp->b_vp = NULL;
|
||||
relpbuf(bp, &ncl_pbuf_freecnt);
|
||||
uma_zfree(ncl_pbuf_zone, bp);
|
||||
if (error == EINTR)
|
||||
return (error);
|
||||
goto do_sync;
|
||||
@ -1571,7 +1571,7 @@ ncl_doio_directwrite(struct buf *bp)
|
||||
mtx_unlock(&np->n_mtx);
|
||||
}
|
||||
bp->b_vp = NULL;
|
||||
relpbuf(bp, &ncl_pbuf_freecnt);
|
||||
uma_zfree(ncl_pbuf_zone, bp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -79,7 +79,7 @@ extern struct vop_vector newnfs_vnodeops;
|
||||
extern struct vop_vector newnfs_fifoops;
|
||||
extern uma_zone_t newnfsnode_zone;
|
||||
extern struct buf_ops buf_ops_newnfs;
|
||||
extern int ncl_pbuf_freecnt;
|
||||
extern uma_zone_t ncl_pbuf_zone;
|
||||
extern short nfsv4_cbport;
|
||||
extern int nfscl_enablecallb;
|
||||
extern int nfs_numnfscbd;
|
||||
@ -1023,7 +1023,7 @@ nfscl_init(void)
|
||||
return;
|
||||
inited = 1;
|
||||
nfscl_inited = 1;
|
||||
ncl_pbuf_freecnt = nswbuf / 2 + 1;
|
||||
ncl_pbuf_zone = pbuf_zsecond_create("nfspbuf", nswbuf / 2);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1357,6 +1357,7 @@ nfscl_modevent(module_t mod, int type, void *data)
|
||||
#if 0
|
||||
ncl_call_invalcaches = NULL;
|
||||
nfsd_call_nfscl = NULL;
|
||||
uma_zdestroy(ncl_pbuf_zone);
|
||||
/* and get rid of the mutexes */
|
||||
mtx_destroy(&ncl_iod_mutex);
|
||||
loaded = 0;
|
||||
|
@ -63,7 +63,7 @@
|
||||
|
||||
/*#define SMBFS_RWGENERIC*/
|
||||
|
||||
extern int smbfs_pbuf_freecnt;
|
||||
extern uma_zone_t smbfs_pbuf_zone;
|
||||
|
||||
static int smbfs_fastlookup = 1;
|
||||
|
||||
@ -468,7 +468,7 @@ smbfs_getpages(ap)
|
||||
scred = smbfs_malloc_scred();
|
||||
smb_makescred(scred, td, cred);
|
||||
|
||||
bp = getpbuf(&smbfs_pbuf_freecnt);
|
||||
bp = uma_zalloc(smbfs_pbuf_zone, M_WAITOK);
|
||||
|
||||
kva = (vm_offset_t) bp->b_data;
|
||||
pmap_qenter(kva, pages, npages);
|
||||
@ -490,7 +490,7 @@ smbfs_getpages(ap)
|
||||
smbfs_free_scred(scred);
|
||||
pmap_qremove(kva, npages);
|
||||
|
||||
relpbuf(bp, &smbfs_pbuf_freecnt);
|
||||
uma_zfree(smbfs_pbuf_zone, bp);
|
||||
|
||||
if (error && (uio.uio_resid == count)) {
|
||||
printf("smbfs_getpages: error %d\n",error);
|
||||
@ -593,7 +593,7 @@ smbfs_putpages(ap)
|
||||
rtvals[i] = VM_PAGER_ERROR;
|
||||
}
|
||||
|
||||
bp = getpbuf(&smbfs_pbuf_freecnt);
|
||||
bp = uma_zalloc(smbfs_pbuf_zone, M_WAITOK);
|
||||
|
||||
kva = (vm_offset_t) bp->b_data;
|
||||
pmap_qenter(kva, pages, npages);
|
||||
@ -621,7 +621,7 @@ smbfs_putpages(ap)
|
||||
|
||||
pmap_qremove(kva, npages);
|
||||
|
||||
relpbuf(bp, &smbfs_pbuf_freecnt);
|
||||
uma_zfree(smbfs_pbuf_zone, bp);
|
||||
|
||||
if (error == 0) {
|
||||
vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
|
||||
|
@ -88,7 +88,7 @@ MODULE_DEPEND(smbfs, netsmb, NSMB_VERSION, NSMB_VERSION, NSMB_VERSION);
|
||||
MODULE_DEPEND(smbfs, libiconv, 1, 1, 2);
|
||||
MODULE_DEPEND(smbfs, libmchain, 1, 1, 1);
|
||||
|
||||
int smbfs_pbuf_freecnt = -1; /* start out unlimited */
|
||||
uma_zone_t smbfs_pbuf_zone;
|
||||
|
||||
static int
|
||||
smbfs_cmount(struct mntarg *ma, void * data, uint64_t flags)
|
||||
@ -367,7 +367,8 @@ smbfs_quotactl(mp, cmd, uid, arg)
|
||||
int
|
||||
smbfs_init(struct vfsconf *vfsp)
|
||||
{
|
||||
smbfs_pbuf_freecnt = nswbuf / 2 + 1;
|
||||
|
||||
smbfs_pbuf_zone = pbuf_zsecond_create("smbpbuf", nswbuf / 2);
|
||||
SMBVDEBUG("done.\n");
|
||||
return 0;
|
||||
}
|
||||
@ -377,6 +378,7 @@ int
|
||||
smbfs_uninit(struct vfsconf *vfsp)
|
||||
{
|
||||
|
||||
uma_zdestroy(smbfs_pbuf_zone);
|
||||
SMBVDEBUG("done.\n");
|
||||
return 0;
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ physio(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
|
||||
pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
|
||||
} else {
|
||||
pbuf = getpbuf(NULL);
|
||||
pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
|
||||
sa = pbuf->b_data;
|
||||
maxpages = btoc(MAXPHYS);
|
||||
pages = pbuf->b_pages;
|
||||
@ -220,7 +220,7 @@ physio(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
}
|
||||
doerror:
|
||||
if (pbuf)
|
||||
relpbuf(pbuf, NULL);
|
||||
uma_zfree(pbuf_zone, pbuf);
|
||||
else if (pages)
|
||||
free(pages, M_DEVBUF);
|
||||
g_destroy_bio(bp);
|
||||
|
@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/msgbuf.h>
|
||||
@ -286,6 +287,15 @@ init_param2(long physpages)
|
||||
TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
|
||||
TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
|
||||
|
||||
/*
|
||||
* Physical buffers are pre-allocated buffers (struct buf) that
|
||||
* are used as temporary holders for I/O, such as paging I/O.
|
||||
*/
|
||||
nswbuf = min(nbuf / 4, 256);
|
||||
TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf);
|
||||
if (nswbuf < NSWBUF_MIN)
|
||||
nswbuf = NSWBUF_MIN;
|
||||
|
||||
/*
|
||||
* The default for maxpipekva is min(1/64 of the kernel address space,
|
||||
* max(1/64 of main memory, 512KB)). See sys_pipe.c for more details.
|
||||
|
@ -1267,7 +1267,7 @@ aio_qbio(struct proc *p, struct kaiocb *job)
|
||||
goto unref;
|
||||
}
|
||||
|
||||
job->pbuf = pbuf = (struct buf *)getpbuf(NULL);
|
||||
job->pbuf = pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
|
||||
BUF_KERNPROC(pbuf);
|
||||
AIO_LOCK(ki);
|
||||
ki->kaio_buffer_count++;
|
||||
@ -1318,7 +1318,7 @@ doerror:
|
||||
AIO_LOCK(ki);
|
||||
ki->kaio_buffer_count--;
|
||||
AIO_UNLOCK(ki);
|
||||
relpbuf(pbuf, NULL);
|
||||
uma_zfree(pbuf_zone, pbuf);
|
||||
job->pbuf = NULL;
|
||||
}
|
||||
g_destroy_bio(bp);
|
||||
@ -2344,7 +2344,7 @@ aio_biowakeup(struct bio *bp)
|
||||
ki = userp->p_aioinfo;
|
||||
if (job->pbuf) {
|
||||
pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages);
|
||||
relpbuf(job->pbuf, NULL);
|
||||
uma_zfree(pbuf_zone, job->pbuf);
|
||||
job->pbuf = NULL;
|
||||
atomic_subtract_int(&num_buf_aio, 1);
|
||||
AIO_LOCK(ki);
|
||||
|
@ -86,7 +86,6 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/swap_pager.h>
|
||||
#include "opt_swap.h"
|
||||
|
||||
static MALLOC_DEFINE(M_BIOBUF, "biobuf", "BIO buffer");
|
||||
|
||||
@ -1017,10 +1016,6 @@ bd_speedup(void)
|
||||
mtx_unlock(&bdlock);
|
||||
}
|
||||
|
||||
#ifndef NSWBUF_MIN
|
||||
#define NSWBUF_MIN 16
|
||||
#endif
|
||||
|
||||
#ifdef __i386__
|
||||
#define TRANSIENT_DENOM 5
|
||||
#else
|
||||
@ -1129,20 +1124,9 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
|
||||
nbuf = buf_sz / BKVASIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* swbufs are used as temporary holders for I/O, such as paging I/O.
|
||||
* We have no less then 16 and no more then 256.
|
||||
*/
|
||||
nswbuf = min(nbuf / 4, 256);
|
||||
TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf);
|
||||
if (nswbuf < NSWBUF_MIN)
|
||||
nswbuf = NSWBUF_MIN;
|
||||
|
||||
/*
|
||||
* Reserve space for the buffer cache buffers
|
||||
*/
|
||||
swbuf = (void *)v;
|
||||
v = (caddr_t)(swbuf + nswbuf);
|
||||
buf = (void *)v;
|
||||
v = (caddr_t)(buf + nbuf);
|
||||
|
||||
|
@ -63,7 +63,9 @@ SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0,
|
||||
#endif
|
||||
|
||||
static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
|
||||
static uma_zone_t cluster_pbuf_zone;
|
||||
|
||||
static void cluster_init(void *);
|
||||
static struct cluster_save *cluster_collectbufs(struct vnode *vp,
|
||||
struct buf *last_bp, int gbflags);
|
||||
static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
|
||||
@ -83,6 +85,15 @@ static int read_min = 1;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0,
|
||||
"Cluster read min block count");
|
||||
|
||||
SYSINIT(cluster, SI_SUB_CPU, SI_ORDER_ANY, cluster_init, NULL);
|
||||
|
||||
static void
|
||||
cluster_init(void *dummy)
|
||||
{
|
||||
|
||||
cluster_pbuf_zone = pbuf_zsecond_create("clpbuf", nswbuf / 2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read data to a buf, including read-ahead if we find this to be beneficial.
|
||||
* cluster_read replaces bread.
|
||||
@ -372,7 +383,7 @@ cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
|
||||
((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
|
||||
return tbp;
|
||||
|
||||
bp = trypbuf(&cluster_pbuf_freecnt);
|
||||
bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
|
||||
if (bp == NULL)
|
||||
return tbp;
|
||||
|
||||
@ -603,7 +614,7 @@ cluster_callback(struct buf *bp)
|
||||
bufdone(tbp);
|
||||
}
|
||||
pbrelvp(bp);
|
||||
relpbuf(bp, &cluster_pbuf_freecnt);
|
||||
uma_zfree(cluster_pbuf_zone, bp);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -856,9 +867,8 @@ cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len,
|
||||
(tbp->b_bcount != tbp->b_bufsize) ||
|
||||
(tbp->b_bcount != size) ||
|
||||
(len == 1) ||
|
||||
((bp = (vp->v_vflag & VV_MD) != 0 ?
|
||||
trypbuf(&cluster_pbuf_freecnt) :
|
||||
getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
|
||||
((bp = uma_zalloc(cluster_pbuf_zone,
|
||||
(vp->v_vflag & VV_MD) != 0 ? M_NOWAIT : M_WAITOK)) == NULL)) {
|
||||
totalwritten += tbp->b_bufsize;
|
||||
bawrite(tbp);
|
||||
++start_lbn;
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include <sys/queue.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/lockmgr.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
struct bio;
|
||||
struct buf;
|
||||
@ -275,6 +276,11 @@ struct buf {
|
||||
#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned"
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
#ifndef NSWBUF_MIN
|
||||
#define NSWBUF_MIN 16
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Buffer locking
|
||||
*/
|
||||
@ -287,7 +293,7 @@ extern const char *buf_wmesg; /* Default buffer lock message */
|
||||
* Initialize a lock.
|
||||
*/
|
||||
#define BUF_LOCKINIT(bp) \
|
||||
lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
|
||||
lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, LK_NEW)
|
||||
/*
|
||||
*
|
||||
* Get a lock sleeping non-interruptably until it becomes available.
|
||||
@ -493,10 +499,6 @@ extern int bdwriteskip;
|
||||
extern int dirtybufferflushes;
|
||||
extern int altbufferflushes;
|
||||
extern int nswbuf; /* Number of swap I/O buffer headers. */
|
||||
extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
|
||||
extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
|
||||
extern int vnode_async_pbuf_freecnt; /* Number of pbufs for vnode pager,
|
||||
asynchronous reads */
|
||||
extern caddr_t unmapped_buf; /* Data address for unmapped buffers. */
|
||||
|
||||
static inline int
|
||||
@ -537,7 +539,6 @@ void brelse(struct buf *);
|
||||
void bqrelse(struct buf *);
|
||||
int vfs_bio_awrite(struct buf *);
|
||||
void vfs_drain_busy_pages(struct buf *bp);
|
||||
struct buf * getpbuf(int *);
|
||||
struct buf *incore(struct bufobj *, daddr_t);
|
||||
struct buf *gbincore(struct bufobj *, daddr_t);
|
||||
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
|
||||
@ -549,6 +550,9 @@ int bufwrite(struct buf *);
|
||||
void bufdone(struct buf *);
|
||||
void bd_speedup(void);
|
||||
|
||||
extern uma_zone_t pbuf_zone;
|
||||
uma_zone_t pbuf_zsecond_create(char *name, int max);
|
||||
|
||||
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
|
||||
struct ucred *, long, int, int, struct buf **);
|
||||
int cluster_wbuild(struct vnode *, long, daddr_t, int, int);
|
||||
@ -562,7 +566,6 @@ void vfs_busy_pages(struct buf *, int clear_modify);
|
||||
void vfs_unbusy_pages(struct buf *);
|
||||
int vmapbuf(struct buf *, int);
|
||||
void vunmapbuf(struct buf *);
|
||||
void relpbuf(struct buf *, int *);
|
||||
void brelvp(struct buf *);
|
||||
void bgetvp(struct vnode *, struct buf *);
|
||||
void pbgetbo(struct bufobj *bo, struct buf *bp);
|
||||
@ -571,7 +574,6 @@ void pbrelbo(struct buf *);
|
||||
void pbrelvp(struct buf *);
|
||||
int allocbuf(struct buf *bp, int size);
|
||||
void reassignbuf(struct buf *);
|
||||
struct buf *trypbuf(int *);
|
||||
void bwait(struct buf *, u_char, const char *);
|
||||
void bdone(struct buf *);
|
||||
|
||||
|
@ -74,9 +74,7 @@ int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
|
||||
|
||||
SYSCTL_DECL(_vfs_ffs);
|
||||
|
||||
static int ffsrawbufcnt = 4;
|
||||
SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
|
||||
"Buffers available for raw reads");
|
||||
static uma_zone_t ffsraw_pbuf_zone;
|
||||
|
||||
static int allowrawread = 1;
|
||||
SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
|
||||
@ -90,7 +88,8 @@ static void
|
||||
ffs_rawread_setup(void *arg __unused)
|
||||
{
|
||||
|
||||
ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
|
||||
ffsraw_pbuf_zone = pbuf_zsecond_create("ffsrawpbuf",
|
||||
(nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8);
|
||||
}
|
||||
SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
|
||||
|
||||
@ -296,8 +295,7 @@ ffs_rawread_main(struct vnode *vp,
|
||||
while (resid > 0) {
|
||||
|
||||
if (bp == NULL) { /* Setup first read */
|
||||
/* XXX: Leave some bufs for swap */
|
||||
bp = getpbuf(&ffsrawbufcnt);
|
||||
bp = uma_zalloc(ffsraw_pbuf_zone, M_WAITOK);
|
||||
pbgetvp(vp, bp);
|
||||
error = ffs_rawread_readahead(vp, udata, offset,
|
||||
resid, td, bp);
|
||||
@ -305,9 +303,9 @@ ffs_rawread_main(struct vnode *vp,
|
||||
break;
|
||||
|
||||
if (resid > bp->b_bufsize) { /* Setup fist readahead */
|
||||
/* XXX: Leave bufs for swap */
|
||||
if (rawreadahead != 0)
|
||||
nbp = trypbuf(&ffsrawbufcnt);
|
||||
nbp = uma_zalloc(ffsraw_pbuf_zone,
|
||||
M_NOWAIT);
|
||||
else
|
||||
nbp = NULL;
|
||||
if (nbp != NULL) {
|
||||
@ -324,7 +322,8 @@ ffs_rawread_main(struct vnode *vp,
|
||||
nbp);
|
||||
if (nerror) {
|
||||
pbrelvp(nbp);
|
||||
relpbuf(nbp, &ffsrawbufcnt);
|
||||
uma_zfree(ffsraw_pbuf_zone,
|
||||
nbp);
|
||||
nbp = NULL;
|
||||
}
|
||||
}
|
||||
@ -365,7 +364,7 @@ ffs_rawread_main(struct vnode *vp,
|
||||
|
||||
if (resid <= bp->b_bufsize) { /* No more readaheads */
|
||||
pbrelvp(nbp);
|
||||
relpbuf(nbp, &ffsrawbufcnt);
|
||||
uma_zfree(ffsraw_pbuf_zone, nbp);
|
||||
nbp = NULL;
|
||||
} else { /* Setup next readahead */
|
||||
nerror = ffs_rawread_readahead(vp,
|
||||
@ -379,7 +378,7 @@ ffs_rawread_main(struct vnode *vp,
|
||||
nbp);
|
||||
if (nerror != 0) {
|
||||
pbrelvp(nbp);
|
||||
relpbuf(nbp, &ffsrawbufcnt);
|
||||
uma_zfree(ffsraw_pbuf_zone, nbp);
|
||||
nbp = NULL;
|
||||
}
|
||||
}
|
||||
@ -395,13 +394,13 @@ ffs_rawread_main(struct vnode *vp,
|
||||
|
||||
if (bp != NULL) {
|
||||
pbrelvp(bp);
|
||||
relpbuf(bp, &ffsrawbufcnt);
|
||||
uma_zfree(ffsraw_pbuf_zone, bp);
|
||||
}
|
||||
if (nbp != NULL) { /* Run down readahead buffer */
|
||||
bwait(nbp, PRIBIO, "rawrd");
|
||||
vunmapbuf(nbp);
|
||||
pbrelvp(nbp);
|
||||
relpbuf(nbp, &ffsrawbufcnt);
|
||||
uma_zfree(ffsraw_pbuf_zone, nbp);
|
||||
}
|
||||
|
||||
if (error == 0)
|
||||
|
@ -71,7 +71,6 @@
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "opt_swap.h"
|
||||
#include "opt_vm.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
@ -324,9 +323,8 @@ swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
|
||||
|
||||
static int swap_pager_full = 2; /* swap space exhaustion (task killing) */
|
||||
static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
|
||||
static int nsw_rcount; /* free read buffers */
|
||||
static int nsw_wcount_sync; /* limit write buffers / synchronous */
|
||||
static int nsw_wcount_async; /* limit write buffers / asynchronous */
|
||||
static struct mtx swbuf_mtx; /* to sync nsw_wcount_async */
|
||||
static int nsw_wcount_async; /* limit async write buffers */
|
||||
static int nsw_wcount_async_max;/* assigned maximum */
|
||||
static int nsw_cluster_max; /* maximum VOP I/O allowed */
|
||||
|
||||
@ -352,6 +350,8 @@ static struct sx sw_alloc_sx;
|
||||
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
|
||||
|
||||
static struct pagerlst swap_pager_object_list[NOBJLISTS];
|
||||
static uma_zone_t swwbuf_zone;
|
||||
static uma_zone_t swrbuf_zone;
|
||||
static uma_zone_t swblk_zone;
|
||||
static uma_zone_t swpctrie_zone;
|
||||
|
||||
@ -539,12 +539,12 @@ swap_pager_swap_init(void)
|
||||
*/
|
||||
nsw_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
|
||||
|
||||
mtx_lock(&pbuf_mtx);
|
||||
nsw_rcount = (nswbuf + 1) / 2;
|
||||
nsw_wcount_sync = (nswbuf + 3) / 4;
|
||||
nsw_wcount_async = 4;
|
||||
nsw_wcount_async_max = nsw_wcount_async;
|
||||
mtx_unlock(&pbuf_mtx);
|
||||
mtx_init(&swbuf_mtx, "async swbuf mutex", NULL, MTX_DEF);
|
||||
|
||||
swwbuf_zone = pbuf_zsecond_create("swwbuf", nswbuf / 4);
|
||||
swrbuf_zone = pbuf_zsecond_create("swrbuf", nswbuf / 2);
|
||||
|
||||
/*
|
||||
* Initialize our zone, taking the user's requested size or
|
||||
@ -1205,7 +1205,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind,
|
||||
("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
|
||||
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
bp = getpbuf(&nsw_rcount);
|
||||
bp = uma_zalloc(swrbuf_zone, M_WAITOK);
|
||||
/* Pages cannot leave the object while busy. */
|
||||
for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
|
||||
MPASS(p->pindex == bm->pindex + i);
|
||||
@ -1406,12 +1406,17 @@ swap_pager_putpages(vm_object_t object, vm_page_t *ma, int count,
|
||||
* All I/O parameters have been satisfied, build the I/O
|
||||
* request and assign the swap space.
|
||||
*/
|
||||
if (sync == TRUE) {
|
||||
bp = getpbuf(&nsw_wcount_sync);
|
||||
} else {
|
||||
bp = getpbuf(&nsw_wcount_async);
|
||||
bp->b_flags = B_ASYNC;
|
||||
if (sync != TRUE) {
|
||||
mtx_lock(&swbuf_mtx);
|
||||
while (nsw_wcount_async == 0)
|
||||
msleep(&nsw_wcount_async, &swbuf_mtx, PVM,
|
||||
"swbufa", 0);
|
||||
nsw_wcount_async--;
|
||||
mtx_unlock(&swbuf_mtx);
|
||||
}
|
||||
bp = uma_zalloc(swwbuf_zone, M_WAITOK);
|
||||
if (sync != TRUE)
|
||||
bp->b_flags = B_ASYNC;
|
||||
bp->b_flags |= B_PAGING;
|
||||
bp->b_iocmd = BIO_WRITE;
|
||||
|
||||
@ -1634,15 +1639,13 @@ swp_pager_async_iodone(struct buf *bp)
|
||||
/*
|
||||
* release the physical I/O buffer
|
||||
*/
|
||||
relpbuf(
|
||||
bp,
|
||||
((bp->b_iocmd == BIO_READ) ? &nsw_rcount :
|
||||
((bp->b_flags & B_ASYNC) ?
|
||||
&nsw_wcount_async :
|
||||
&nsw_wcount_sync
|
||||
)
|
||||
)
|
||||
);
|
||||
if (bp->b_flags & B_ASYNC) {
|
||||
mtx_lock(&swbuf_mtx);
|
||||
if (++nsw_wcount_async == 1)
|
||||
wakeup(&nsw_wcount_async);
|
||||
mtx_unlock(&swbuf_mtx);
|
||||
}
|
||||
uma_zfree((bp->b_iocmd == BIO_READ) ? swrbuf_zone : swwbuf_zone, bp);
|
||||
}
|
||||
|
||||
int
|
||||
@ -2627,6 +2630,7 @@ swapgeom_done(struct bio *bp2)
|
||||
bp->b_ioflags |= BIO_ERROR;
|
||||
bp->b_resid = bp->b_bcount - bp2->bio_completed;
|
||||
bp->b_error = bp2->bio_error;
|
||||
bp->b_caller1 = NULL;
|
||||
bufdone(bp);
|
||||
sp = bp2->bio_caller1;
|
||||
mtx_lock(&sw_dev_mtx);
|
||||
@ -2666,6 +2670,7 @@ swapgeom_strategy(struct buf *bp, struct swdevt *sp)
|
||||
return;
|
||||
}
|
||||
|
||||
bp->b_caller1 = bio;
|
||||
bio->bio_caller1 = sp;
|
||||
bio->bio_caller2 = bp;
|
||||
bio->bio_cmd = bp->b_iocmd;
|
||||
@ -2880,7 +2885,7 @@ sysctl_swap_async_max(SYSCTL_HANDLER_ARGS)
|
||||
if (new > nswbuf / 2 || new < 1)
|
||||
return (EINVAL);
|
||||
|
||||
mtx_lock(&pbuf_mtx);
|
||||
mtx_lock(&swbuf_mtx);
|
||||
while (nsw_wcount_async_max != new) {
|
||||
/*
|
||||
* Adjust difference. If the current async count is too low,
|
||||
@ -2895,11 +2900,11 @@ sysctl_swap_async_max(SYSCTL_HANDLER_ARGS)
|
||||
} else {
|
||||
nsw_wcount_async_max -= nsw_wcount_async;
|
||||
nsw_wcount_async = 0;
|
||||
msleep(&nsw_wcount_async, &pbuf_mtx, PSWP,
|
||||
msleep(&nsw_wcount_async, &swbuf_mtx, PSWP,
|
||||
"swpsysctl", 0);
|
||||
}
|
||||
}
|
||||
mtx_unlock(&pbuf_mtx);
|
||||
mtx_unlock(&swbuf_mtx);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -68,6 +68,8 @@
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "opt_param.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -85,10 +87,12 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pager.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
int cluster_pbuf_freecnt = -1; /* unlimited to begin with */
|
||||
|
||||
struct buf *swbuf;
|
||||
uma_zone_t pbuf_zone;
|
||||
static int pbuf_init(void *, int, int);
|
||||
static int pbuf_ctor(void *, int, void *, int);
|
||||
static void pbuf_dtor(void *, int, void *);
|
||||
|
||||
static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *);
|
||||
static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
|
||||
@ -167,9 +171,6 @@ struct pagerops *pagertab[] = {
|
||||
* cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
|
||||
* (MAXPHYS == 64k) if you want to get the most efficiency.
|
||||
*/
|
||||
struct mtx_padalign __exclusive_cache_line pbuf_mtx;
|
||||
static TAILQ_HEAD(swqueue, buf) bswlist;
|
||||
static int bswneeded;
|
||||
vm_offset_t swapbkva; /* swap buffers kva */
|
||||
|
||||
void
|
||||
@ -177,7 +178,6 @@ vm_pager_init(void)
|
||||
{
|
||||
struct pagerops **pgops;
|
||||
|
||||
TAILQ_INIT(&bswlist);
|
||||
/*
|
||||
* Initialize known pagers
|
||||
*/
|
||||
@ -186,30 +186,51 @@ vm_pager_init(void)
|
||||
(*(*pgops)->pgo_init)();
|
||||
}
|
||||
|
||||
static int nswbuf_max;
|
||||
|
||||
void
|
||||
vm_pager_bufferinit(void)
|
||||
{
|
||||
struct buf *bp;
|
||||
int i;
|
||||
|
||||
mtx_init(&pbuf_mtx, "pbuf mutex", NULL, MTX_DEF);
|
||||
bp = swbuf;
|
||||
/*
|
||||
* Now set up swap and physical I/O buffer headers.
|
||||
*/
|
||||
for (i = 0; i < nswbuf; i++, bp++) {
|
||||
TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
|
||||
BUF_LOCKINIT(bp);
|
||||
LIST_INIT(&bp->b_dep);
|
||||
bp->b_rcred = bp->b_wcred = NOCRED;
|
||||
bp->b_xflags = 0;
|
||||
}
|
||||
|
||||
cluster_pbuf_freecnt = nswbuf / 2;
|
||||
vnode_pbuf_freecnt = nswbuf / 2 + 1;
|
||||
vnode_async_pbuf_freecnt = nswbuf / 2;
|
||||
/* Main zone for paging bufs. */
|
||||
pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf),
|
||||
pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE,
|
||||
UMA_ZONE_VM | UMA_ZONE_NOFREE);
|
||||
/* Few systems may still use this zone directly, so it needs a limit. */
|
||||
nswbuf_max += uma_zone_set_max(pbuf_zone, NSWBUF_MIN);
|
||||
}
|
||||
|
||||
uma_zone_t
|
||||
pbuf_zsecond_create(char *name, int max)
|
||||
{
|
||||
uma_zone_t zone;
|
||||
|
||||
zone = uma_zsecond_create(name, pbuf_ctor, pbuf_dtor, NULL, NULL,
|
||||
pbuf_zone);
|
||||
/*
|
||||
* uma_prealloc() rounds up to items per slab. If we would prealloc
|
||||
* immediately on every pbuf_zsecond_create(), we may accumulate too
|
||||
* much of difference between hard limit and prealloced items, which
|
||||
* means wasted memory.
|
||||
*/
|
||||
if (nswbuf_max > 0)
|
||||
nswbuf_max += uma_zone_set_max(zone, max);
|
||||
else
|
||||
uma_prealloc(pbuf_zone, uma_zone_set_max(zone, max));
|
||||
|
||||
return (zone);
|
||||
}
|
||||
|
||||
static void
|
||||
pbuf_prealloc(void *arg __unused)
|
||||
{
|
||||
|
||||
uma_prealloc(pbuf_zone, nswbuf_max);
|
||||
nswbuf_max = -1;
|
||||
}
|
||||
|
||||
SYSINIT(pbuf, SI_SUB_KTHREAD_BUF, SI_ORDER_ANY, pbuf_prealloc, NULL);
|
||||
|
||||
/*
|
||||
* Allocate an instance of a pager of the given type.
|
||||
* Size, protection and offset parameters are passed in for pagers that
|
||||
@ -347,110 +368,33 @@ vm_pager_object_lookup(struct pagerlst *pg_list, void *handle)
|
||||
return (object);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize a physical buffer
|
||||
*/
|
||||
|
||||
/*
|
||||
* XXX This probably belongs in vfs_bio.c
|
||||
*/
|
||||
static void
|
||||
initpbuf(struct buf *bp)
|
||||
static int
|
||||
pbuf_ctor(void *mem, int size, void *arg, int flags)
|
||||
{
|
||||
struct buf *bp = mem;
|
||||
|
||||
KASSERT(bp->b_bufobj == NULL, ("initpbuf with bufobj"));
|
||||
KASSERT(bp->b_vp == NULL, ("initpbuf with vp"));
|
||||
bp->b_vp = NULL;
|
||||
bp->b_bufobj = NULL;
|
||||
|
||||
/* copied from initpbuf() */
|
||||
bp->b_rcred = NOCRED;
|
||||
bp->b_wcred = NOCRED;
|
||||
bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
|
||||
bp->b_kvabase = (caddr_t)(MAXPHYS * (bp - swbuf)) + swapbkva;
|
||||
bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
|
||||
bp->b_data = bp->b_kvabase;
|
||||
bp->b_kvasize = MAXPHYS;
|
||||
bp->b_flags = 0;
|
||||
bp->b_xflags = 0;
|
||||
bp->b_flags = 0;
|
||||
bp->b_ioflags = 0;
|
||||
bp->b_iodone = NULL;
|
||||
bp->b_error = 0;
|
||||
BUF_LOCK(bp, LK_EXCLUSIVE, NULL);
|
||||
buf_track(bp, __func__);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate a physical buffer
|
||||
*
|
||||
* There are a limited number (nswbuf) of physical buffers. We need
|
||||
* to make sure that no single subsystem is able to hog all of them,
|
||||
* so each subsystem implements a counter which is typically initialized
|
||||
* to 1/2 nswbuf. getpbuf() decrements this counter in allocation and
|
||||
* increments it on release, and blocks if the counter hits zero. A
|
||||
* subsystem may initialize the counter to -1 to disable the feature,
|
||||
* but it must still be sure to match up all uses of getpbuf() with
|
||||
* relpbuf() using the same variable.
|
||||
*
|
||||
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed
|
||||
* relatively soon when the rest of the subsystems get smart about it. XXX
|
||||
*/
|
||||
struct buf *
|
||||
getpbuf(int *pfreecnt)
|
||||
{
|
||||
struct buf *bp;
|
||||
|
||||
mtx_lock(&pbuf_mtx);
|
||||
for (;;) {
|
||||
if (pfreecnt != NULL) {
|
||||
while (*pfreecnt == 0) {
|
||||
msleep(pfreecnt, &pbuf_mtx, PVM, "wswbuf0", 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* get a bp from the swap buffer header pool */
|
||||
if ((bp = TAILQ_FIRST(&bswlist)) != NULL)
|
||||
break;
|
||||
|
||||
bswneeded = 1;
|
||||
msleep(&bswneeded, &pbuf_mtx, PVM, "wswbuf1", 0);
|
||||
/* loop in case someone else grabbed one */
|
||||
}
|
||||
TAILQ_REMOVE(&bswlist, bp, b_freelist);
|
||||
if (pfreecnt)
|
||||
--*pfreecnt;
|
||||
mtx_unlock(&pbuf_mtx);
|
||||
initpbuf(bp);
|
||||
return (bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* allocate a physical buffer, if one is available.
|
||||
*
|
||||
* Note that there is no NULL hack here - all subsystems using this
|
||||
* call understand how to use pfreecnt.
|
||||
*/
|
||||
struct buf *
|
||||
trypbuf(int *pfreecnt)
|
||||
{
|
||||
struct buf *bp;
|
||||
|
||||
mtx_lock(&pbuf_mtx);
|
||||
if (*pfreecnt == 0 || (bp = TAILQ_FIRST(&bswlist)) == NULL) {
|
||||
mtx_unlock(&pbuf_mtx);
|
||||
return NULL;
|
||||
}
|
||||
TAILQ_REMOVE(&bswlist, bp, b_freelist);
|
||||
--*pfreecnt;
|
||||
mtx_unlock(&pbuf_mtx);
|
||||
initpbuf(bp);
|
||||
return (bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* release a physical buffer
|
||||
*
|
||||
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed
|
||||
* relatively soon when the rest of the subsystems get smart about it. XXX
|
||||
*/
|
||||
void
|
||||
relpbuf(struct buf *bp, int *pfreecnt)
|
||||
static void
|
||||
pbuf_dtor(void *mem, int size, void *arg)
|
||||
{
|
||||
struct buf *bp = mem;
|
||||
|
||||
if (bp->b_rcred != NOCRED) {
|
||||
crfree(bp->b_rcred);
|
||||
@ -461,24 +405,24 @@ relpbuf(struct buf *bp, int *pfreecnt)
|
||||
bp->b_wcred = NOCRED;
|
||||
}
|
||||
|
||||
KASSERT(bp->b_vp == NULL, ("relpbuf with vp"));
|
||||
KASSERT(bp->b_bufobj == NULL, ("relpbuf with bufobj"));
|
||||
|
||||
buf_track(bp, __func__);
|
||||
BUF_UNLOCK(bp);
|
||||
}
|
||||
|
||||
mtx_lock(&pbuf_mtx);
|
||||
TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
|
||||
static int
|
||||
pbuf_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct buf *bp = mem;
|
||||
|
||||
if (bswneeded) {
|
||||
bswneeded = 0;
|
||||
wakeup(&bswneeded);
|
||||
}
|
||||
if (pfreecnt) {
|
||||
if (++*pfreecnt == 1)
|
||||
wakeup(pfreecnt);
|
||||
}
|
||||
mtx_unlock(&pbuf_mtx);
|
||||
bp->b_kvabase = (void *)kva_alloc(MAXPHYS);
|
||||
if (bp->b_kvabase == NULL)
|
||||
return (ENOMEM);
|
||||
bp->b_kvasize = MAXPHYS;
|
||||
BUF_LOCKINIT(bp);
|
||||
LIST_INIT(&bp->b_dep);
|
||||
bp->b_rcred = bp->b_wcred = NOCRED;
|
||||
bp->b_xflags = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_vm.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/proc.h>
|
||||
@ -82,6 +83,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vnode_pager.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/uma.h>
|
||||
|
||||
static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
|
||||
daddr_t *rtaddress, int *run);
|
||||
@ -107,15 +109,22 @@ struct pagerops vnodepagerops = {
|
||||
.pgo_haspage = vnode_pager_haspage,
|
||||
};
|
||||
|
||||
int vnode_pbuf_freecnt;
|
||||
int vnode_async_pbuf_freecnt;
|
||||
|
||||
static struct domainset *vnode_domainset = NULL;
|
||||
|
||||
SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_RW,
|
||||
&vnode_domainset, 0, sysctl_handle_domainset, "A",
|
||||
"Default vnode NUMA policy");
|
||||
|
||||
static uma_zone_t vnode_pbuf_zone;
|
||||
|
||||
static void
|
||||
vnode_pager_init(void *dummy)
|
||||
{
|
||||
|
||||
vnode_pbuf_zone = pbuf_zsecond_create("vnpbuf", nswbuf * 8);
|
||||
}
|
||||
SYSINIT(vnode_pager, SI_SUB_CPU, SI_ORDER_ANY, vnode_pager_init, NULL);
|
||||
|
||||
/* Create the VM system backing object for this vnode */
|
||||
int
|
||||
vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
|
||||
@ -563,7 +572,7 @@ vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
|
||||
break;
|
||||
}
|
||||
if (fileaddr != -1) {
|
||||
bp = getpbuf(&vnode_pbuf_freecnt);
|
||||
bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
|
||||
|
||||
/* build a minimal buffer header */
|
||||
bp->b_iocmd = BIO_READ;
|
||||
@ -595,7 +604,7 @@ vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
|
||||
*/
|
||||
bp->b_vp = NULL;
|
||||
pbrelbo(bp);
|
||||
relpbuf(bp, &vnode_pbuf_freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
if (error)
|
||||
break;
|
||||
} else
|
||||
@ -757,7 +766,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
#ifdef INVARIANTS
|
||||
off_t blkno0;
|
||||
#endif
|
||||
int bsize, pagesperblock, *freecnt;
|
||||
int bsize, pagesperblock;
|
||||
int error, before, after, rbehind, rahead, poff, i;
|
||||
int bytecount, secmask;
|
||||
|
||||
@ -788,17 +797,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
return (VM_PAGER_OK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Synchronous and asynchronous paging operations use different
|
||||
* free pbuf counters. This is done to avoid asynchronous requests
|
||||
* to consume all pbufs.
|
||||
* Allocate the pbuf at the very beginning of the function, so that
|
||||
* if we are low on certain kind of pbufs don't even proceed to BMAP,
|
||||
* but sleep.
|
||||
*/
|
||||
freecnt = iodone != NULL ?
|
||||
&vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
|
||||
bp = getpbuf(freecnt);
|
||||
bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
|
||||
|
||||
/*
|
||||
* Get the underlying device blocks for the file with VOP_BMAP().
|
||||
@ -807,7 +806,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
*/
|
||||
error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
|
||||
if (error == EOPNOTSUPP) {
|
||||
relpbuf(bp, freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
VM_OBJECT_WLOCK(object);
|
||||
for (i = 0; i < count; i++) {
|
||||
VM_CNT_INC(v_vnodein);
|
||||
@ -819,7 +818,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
VM_OBJECT_WUNLOCK(object);
|
||||
return (error);
|
||||
} else if (error != 0) {
|
||||
relpbuf(bp, freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
return (VM_PAGER_ERROR);
|
||||
}
|
||||
|
||||
@ -828,7 +827,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
* than a page size, then use special small filesystem code.
|
||||
*/
|
||||
if (pagesperblock == 0) {
|
||||
relpbuf(bp, freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
for (i = 0; i < count; i++) {
|
||||
VM_CNT_INC(v_vnodein);
|
||||
VM_CNT_INC(v_vnodepgsin);
|
||||
@ -847,7 +846,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
KASSERT(count == 1,
|
||||
("%s: array[%d] request to a sparse file %p", __func__,
|
||||
count, vp));
|
||||
relpbuf(bp, freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
pmap_zero_page(m[0]);
|
||||
KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
|
||||
__func__, m[0]));
|
||||
@ -1061,7 +1060,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
|
||||
bp->b_pages[i] = NULL;
|
||||
bp->b_vp = NULL;
|
||||
pbrelbo(bp);
|
||||
relpbuf(bp, &vnode_pbuf_freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
|
||||
}
|
||||
}
|
||||
@ -1079,7 +1078,7 @@ vnode_pager_generic_getpages_done_async(struct buf *bp)
|
||||
bp->b_pages[i] = NULL;
|
||||
bp->b_vp = NULL;
|
||||
pbrelbo(bp);
|
||||
relpbuf(bp, &vnode_async_pbuf_freecnt);
|
||||
uma_zfree(vnode_pbuf_zone, bp);
|
||||
}
|
||||
|
||||
static int
|
||||
|
Loading…
x
Reference in New Issue
Block a user