Release laundered vnode pages to the head of the inactive queue.

The swap pager enqueues laundered pages near the head of the inactive queue
to avoid another trip through LRU before reclamation. This change adds
support for this behaviour to the vnode pager and makes use of it in UFS and
ext2fs. Some ioflag handling is consolidated into a common subroutine so
that this support can be easily extended to other filesystems which make use
of the buffer cache. No changes are needed for ZFS since its putpages
routine always undirties the pages before returning, and the laundry
thread requeues the pages appropriately in this case.

Reviewed by:	alc, kib
Differential Revision:	https://reviews.freebsd.org/D8589
This commit is contained in:
markj 2016-11-23 17:53:07 +00:00
parent 7982b0b181
commit 4159d33f6b
8 changed files with 62 additions and 129 deletions

View File

@ -1700,15 +1700,6 @@ ext2_ind_read(struct vop_read_args *ap)
break;
}
/*
* If IO_DIRECT then set B_DIRECT for the buffer. This
* will cause us to attempt to release the buffer later on
* and will cause the buffer cache to attempt to free the
* underlying pages.
*/
if (ioflag & IO_DIRECT)
bp->b_flags |= B_DIRECT;
/*
* We should only get non-zero b_resid when an I/O error
* has occurred, which should cause us to break above.
@ -1726,25 +1717,7 @@ ext2_ind_read(struct vop_read_args *ap)
(int)xfersize, uio);
if (error)
break;
if (ioflag & (IO_VMIO|IO_DIRECT)) {
/*
* If it's VMIO or direct I/O, then we don't
* need the buf, mark it available for
* freeing. If it's non-direct VMIO, the VM has
* the data.
*/
bp->b_flags |= B_RELBUF;
brelse(bp);
} else {
/*
* Otherwise let whoever
* made the request take care of
* freeing it. We just queue
* it onto another list.
*/
bqrelse(bp);
}
vfs_bio_brelse(bp, ioflag);
}
/*
@ -1753,14 +1726,8 @@ ext2_ind_read(struct vop_read_args *ap)
* and on normal completion has not set a new value into it.
* so it must have come from a 'break' statement
*/
if (bp != NULL) {
if (ioflag & (IO_VMIO|IO_DIRECT)) {
bp->b_flags |= B_RELBUF;
brelse(bp);
} else {
bqrelse(bp);
}
}
if (bp != NULL)
vfs_bio_brelse(bp, ioflag);
if ((error == 0 || uio->uio_resid != orig_resid) &&
(vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
@ -2018,9 +1985,8 @@ ext2_write(struct vop_write_args *ap)
if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
fs->e2fs_bsize == xfersize)
vfs_bio_clrbuf(bp);
if (ioflag & (IO_VMIO|IO_DIRECT)) {
bp->b_flags |= B_RELBUF;
}
vfs_bio_set_flags(bp, ioflag);
/*
* If IO_SYNC each buffer is written synchronously. Otherwise

View File

@ -4413,6 +4413,45 @@ vfs_bio_bzero_buf(struct buf *bp, int base, int size)
}
}
/*
* Update buffer flags based on I/O request parameters, optionally releasing the
* buffer. If it's VMIO or direct I/O, the buffer pages are released to the VM,
* where they may be placed on a page queue (VMIO) or freed immediately (direct
* I/O). Otherwise the buffer is released to the cache.
*/
static void
b_io_dismiss(struct buf *bp, int ioflag, bool release)
{
KASSERT((ioflag & IO_NOREUSE) == 0 || (ioflag & IO_VMIO) != 0,
("buf %p non-VMIO noreuse", bp));
if ((ioflag & IO_DIRECT) != 0)
bp->b_flags |= B_DIRECT;
if ((ioflag & (IO_VMIO | IO_DIRECT)) != 0 && LIST_EMPTY(&bp->b_dep)) {
bp->b_flags |= B_RELBUF;
if ((ioflag & IO_NOREUSE) != 0)
bp->b_flags |= B_NOREUSE;
if (release)
brelse(bp);
} else if (release)
bqrelse(bp);
}
void
vfs_bio_brelse(struct buf *bp, int ioflag)
{
b_io_dismiss(bp, ioflag, true);
}
void
vfs_bio_set_flags(struct buf *bp, int ioflag)
{
b_io_dismiss(bp, ioflag, false);
}
/*
* vm_hold_load_pages and vm_hold_free_pages get pages into
* a buffers address space. The pages are anonymous and are

View File

@ -538,9 +538,11 @@ int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, int, struct buf **);
int cluster_wbuild(struct vnode *, long, daddr_t, int, int);
void cluster_write(struct vnode *, struct buf *, u_quad_t, int, int);
void vfs_bio_brelse(struct buf *bp, int ioflags);
void vfs_bio_bzero_buf(struct buf *bp, int base, int size);
void vfs_bio_set_valid(struct buf *, int base, int size);
void vfs_bio_clrbuf(struct buf *);
void vfs_bio_set_flags(struct buf *bp, int ioflags);
void vfs_bio_set_valid(struct buf *, int base, int size);
void vfs_busy_pages(struct buf *, int clear_modify);
void vfs_unbusy_pages(struct buf *);
int vmapbuf(struct buf *, int);

View File

@ -307,6 +307,7 @@ struct vattr {
#define IO_INVAL 0x0040 /* invalidate after I/O */
#define IO_SYNC 0x0080 /* do I/O synchronously */
#define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */
#define IO_NOREUSE 0x0200 /* VMIO data won't be reused */
#define IO_EXT 0x0400 /* operate on external attributes */
#define IO_NORMAL 0x0800 /* operate on regular data */
#define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */

View File

@ -600,15 +600,6 @@ ffs_read(ap)
break;
}
/*
* If IO_DIRECT then set B_DIRECT for the buffer. This
* will cause us to attempt to release the buffer later on
* and will cause the buffer cache to attempt to free the
* underlying pages.
*/
if (ioflag & IO_DIRECT)
bp->b_flags |= B_DIRECT;
/*
* We should only get non-zero b_resid when an I/O error
* has occurred, which should cause us to break above.
@ -633,25 +624,7 @@ ffs_read(ap)
if (error)
break;
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
/*
* If there are no dependencies, and it's VMIO,
* then we don't need the buf, mark it available
* for freeing. For non-direct VMIO reads, the VM
* has the data.
*/
bp->b_flags |= B_RELBUF;
brelse(bp);
} else {
/*
* Otherwise let whoever
* made the request take care of
* freeing it. We just queue
* it onto another list.
*/
bqrelse(bp);
}
vfs_bio_brelse(bp, ioflag);
}
/*
@ -660,15 +633,8 @@ ffs_read(ap)
* and on normal completion has not set a new value into it.
* so it must have come from a 'break' statement
*/
if (bp != NULL) {
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
bp->b_flags |= B_RELBUF;
brelse(bp);
} else {
bqrelse(bp);
}
}
if (bp != NULL)
vfs_bio_brelse(bp, ioflag);
if ((error == 0 || uio->uio_resid != orig_resid) &&
(vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
@ -786,8 +752,6 @@ ffs_write(ap)
vnode_pager_setsize(vp, ip->i_size);
break;
}
if (ioflag & IO_DIRECT)
bp->b_flags |= B_DIRECT;
if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
bp->b_flags |= B_NOCACHE;
@ -827,10 +791,8 @@ ffs_write(ap)
if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
fs->fs_bsize == xfersize)
vfs_bio_clrbuf(bp);
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
bp->b_flags |= B_RELBUF;
}
vfs_bio_set_flags(bp, ioflag);
/*
* If IO_SYNC each buffer is written synchronously. Otherwise
@ -977,15 +939,6 @@ ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
break;
}
/*
* If IO_DIRECT then set B_DIRECT for the buffer. This
* will cause us to attempt to release the buffer later on
* and will cause the buffer cache to attempt to free the
* underlying pages.
*/
if (ioflag & IO_DIRECT)
bp->b_flags |= B_DIRECT;
/*
* We should only get non-zero b_resid when an I/O error
* has occurred, which should cause us to break above.
@ -1004,26 +957,7 @@ ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
(int)xfersize, uio);
if (error)
break;
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
/*
* If there are no dependencies, and it's VMIO,
* then we don't need the buf, mark it available
* for freeing. For non-direct VMIO reads, the VM
* has the data.
*/
bp->b_flags |= B_RELBUF;
brelse(bp);
} else {
/*
* Otherwise let whoever
* made the request take care of
* freeing it. We just queue
* it onto another list.
*/
bqrelse(bp);
}
vfs_bio_brelse(bp, ioflag);
}
/*
@ -1032,15 +966,8 @@ ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
* and on normal completion has not set a new value into it.
* so it must have come from a 'break' statement
*/
if (bp != NULL) {
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
bp->b_flags |= B_RELBUF;
brelse(bp);
} else {
bqrelse(bp);
}
}
if (bp != NULL)
vfs_bio_brelse(bp, ioflag);
return (error);
}
@ -1109,8 +1036,6 @@ ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
*/
if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
vfs_bio_clrbuf(bp);
if (ioflag & IO_DIRECT)
bp->b_flags |= B_DIRECT;
if (uio->uio_offset + xfersize > dp->di_extsize)
dp->di_extsize = uio->uio_offset + xfersize;
@ -1121,10 +1046,8 @@ ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
error =
uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
(LIST_EMPTY(&bp->b_dep))) {
bp->b_flags |= B_RELBUF;
}
vfs_bio_set_flags(bp, ioflag);
/*
* If IO_SYNC each buffer is written synchronously. Otherwise

View File

@ -483,8 +483,8 @@ more:
if (ib != 0 && pageout_count < vm_pageout_page_count)
goto more;
return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL,
NULL));
return (vm_pageout_flush(&mc[page_base], pageout_count,
VM_PAGER_PUT_NOREUSE, 0, NULL, NULL));
}
/*

View File

@ -95,6 +95,7 @@ extern struct pagerops mgtdevicepagerops;
#define VM_PAGER_PUT_SYNC 0x0001
#define VM_PAGER_PUT_INVAL 0x0002
#define VM_PAGER_PUT_NOREUSE 0x0004
#define VM_PAGER_CLUSTER_OK 0x0008
#ifdef _KERNEL

View File

@ -1280,6 +1280,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
ioflags |= IO_ASYNC;
ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
ioflags |= (flags & VM_PAGER_PUT_NOREUSE) ? IO_NOREUSE : 0;
ioflags |= IO_SEQMAX << IO_SEQSHIFT;
aiov.iov_base = (caddr_t) 0;