Detect and optimize reads from the hole on UFS.

- Create getblkx(9) variant of getblk(9) which can return error.
- Add GB_NOSPARSE flag for getblk()/getblkx() which requests that BMAP
  was performed before the buffer is created, and EJUSTRETURN returned
  in case the requested block does not exist.
- Make ffs_read() use GB_NOSPARSE to avoid instantiating buffer (and
  allocating the pages for it), copying from zero_region instead.

The end result is less page allocations and buffer recycling when a
hole is read, which is important for some benchmarks.

Requested and reviewed by:	jeff
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D14917
This commit is contained in:
Konstantin Belousov 2018-05-13 09:47:28 +00:00
parent f1401123c5
commit 2ebc882927
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=333576
4 changed files with 109 additions and 43 deletions

View File

@ -2138,30 +2138,37 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno,
void (*ckhashfunc)(struct buf *), struct buf **bpp)
{
struct buf *bp;
int readwait, rv;
struct thread *td;
int error, readwait, rv;
CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size);
td = curthread;
/*
* Can only return NULL if GB_LOCK_NOWAIT flag is specified.
* Can only return NULL if GB_LOCK_NOWAIT or GB_SPARSE flags
* are specified.
*/
*bpp = bp = getblk(vp, blkno, size, 0, 0, flags);
if (bp == NULL)
return (EBUSY);
error = getblkx(vp, blkno, size, 0, 0, flags, &bp);
if (error != 0) {
*bpp = NULL;
return (error);
}
flags &= ~GB_NOSPARSE;
*bpp = bp;
/*
* If not found in cache, do some I/O
*/
readwait = 0;
if ((bp->b_flags & B_CACHE) == 0) {
if (!TD_IS_IDLETHREAD(curthread)) {
if (!TD_IS_IDLETHREAD(td)) {
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, bp, 0);
PROC_UNLOCK(curproc);
PROC_LOCK(td->td_proc);
racct_add_buf(td->td_proc, bp, 0);
PROC_UNLOCK(td->td_proc);
}
#endif /* RACCT */
curthread->td_ru.ru_inblock++;
td->td_ru.ru_inblock++;
}
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
@ -3822,8 +3829,21 @@ bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
}
}
struct buf *
getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
int flags)
{
struct buf *bp;
int error;
error = getblkx(vp, blkno, size, slpflag, slptimeo, flags, &bp);
if (error != 0)
return (NULL);
return (bp);
}
/*
* getblk:
* getblkx:
*
* Get a block given a specified block and offset into a file/device.
* The buffers B_DONE bit will be cleared on return, making it almost
@ -3858,12 +3878,13 @@ bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
* intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR
* prior to issuing the READ. biodone() will *not* clear B_INVAL.
*/
struct buf *
getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
int flags)
int
getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
int flags, struct buf **bpp)
{
struct buf *bp;
struct bufobj *bo;
daddr_t d_blkno;
int bsize, error, maxsize, vmio;
off_t offset;
@ -3878,6 +3899,7 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
bo = &vp->v_bufobj;
d_blkno = blkno;
loop:
BO_RLOCK(bo);
bp = gbincore(bo, blkno);
@ -3889,7 +3911,7 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
*/
lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK;
if (flags & GB_LOCK_NOWAIT)
if ((flags & GB_LOCK_NOWAIT) != 0)
lockflags |= LK_NOWAIT;
error = BUF_TIMELOCK(bp, lockflags,
@ -3902,8 +3924,8 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
if (error == ENOLCK)
goto loop;
/* We timed out or were interrupted. */
else if (error)
return (NULL);
else if (error != 0)
return (error);
/* If recursed, assume caller knows the rules. */
else if (BUF_LOCKRECURSED(bp))
goto end;
@ -4008,10 +4030,10 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
* here.
*/
if (flags & GB_NOCREAT)
return NULL;
return (EEXIST);
if (bdomain[bo->bo_domain].bd_freebuffers == 0 &&
TD_IS_IDLETHREAD(curthread))
return NULL;
return (EBUSY);
bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize;
KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
@ -4025,11 +4047,22 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
}
maxsize = imax(maxsize, bsize);
if ((flags & GB_NOSPARSE) != 0 && vmio &&
!vn_isdisk(vp, NULL)) {
error = VOP_BMAP(vp, blkno, NULL, &d_blkno, 0, 0);
KASSERT(error != EOPNOTSUPP,
("GB_NOSPARSE from fs not supporting bmap, vp %p",
vp));
if (error != 0)
return (error);
if (d_blkno == -1)
return (EJUSTRETURN);
}
bp = getnewbuf(vp, slpflag, slptimeo, maxsize, flags);
if (bp == NULL) {
if (slpflag || slptimeo)
return NULL;
return (ETIMEDOUT);
/*
* XXX This is here until the sleep path is diagnosed
* enough to work under very low memory conditions.
@ -4075,7 +4108,8 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
* Insert the buffer into the hash, so that it can
* be found by incore.
*/
bp->b_blkno = bp->b_lblkno = blkno;
bp->b_lblkno = blkno;
bp->b_blkno = d_blkno;
bp->b_offset = offset;
bgetvp(vp, bp);
BO_UNLOCK(bo);
@ -4110,7 +4144,8 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
buf_track(bp, __func__);
KASSERT(bp->b_bufobj == bo,
("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
return (bp);
*bpp = bp;
return (0);
}
/*

View File

@ -94,12 +94,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
{
struct buf *bp, *rbp, *reqbp;
struct bufobj *bo;
struct thread *td;
daddr_t blkno, origblkno;
int maxra, racluster;
int error, ncontig;
int i;
error = 0;
td = curthread;
bo = &vp->v_bufobj;
if (!unmapped_buf_allowed)
gbflags &= ~GB_UNMAPPED;
@ -118,10 +120,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
/*
* get the requested block
*/
*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
if (bp == NULL)
return (EBUSY);
error = getblkx(vp, lblkno, size, 0, 0, gbflags, &bp);
if (error != 0) {
*bpp = NULL;
return (error);
}
gbflags &= ~GB_NOSPARSE;
origblkno = lblkno;
*bpp = reqbp = bp;
/*
* if it is in the cache, then check to see if the reads have been
@ -243,12 +249,12 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
bstrategy(bp);
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, bp, 0);
PROC_UNLOCK(curproc);
PROC_LOCK(td->td_proc);
racct_add_buf(td->td_proc, bp, 0);
PROC_UNLOCK(td->td_proc);
}
#endif /* RACCT */
curthread->td_ru.ru_inblock++;
td->td_ru.ru_inblock++;
}
/*
@ -303,12 +309,12 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
bstrategy(rbp);
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, rbp, 0);
PROC_UNLOCK(curproc);
PROC_LOCK(td->td_proc);
racct_add_buf(td->td_proc, rbp, 0);
PROC_UNLOCK(td->td_proc);
}
#endif /* RACCT */
curthread->td_ru.ru_inblock++;
td->td_ru.ru_inblock++;
}
if (reqbp) {

View File

@ -479,6 +479,7 @@ buf_track(struct buf *bp, const char *location)
#define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */
#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
#define GB_CKHASH 0x0020 /* If reading, calc checksum hash */
#define GB_NOSPARSE 0x0040 /* Do not instantiate holes */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
@ -540,6 +541,8 @@ struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
int getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag,
int slptimeo, int flags, struct buf **bpp);
struct buf *geteblk(int, int);
int bufwait(struct buf *);
int bufwrite(struct buf *);

View File

@ -462,6 +462,26 @@ ffs_lock(ap)
#endif
}
static int
ffs_read_hole(struct uio *uio, long xfersize, long *size)
{
ssize_t saved_resid, tlen;
int error;
while (xfersize > 0) {
tlen = min(xfersize, ZERO_REGION_SIZE);
saved_resid = uio->uio_resid;
error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
tlen, uio);
if (error != 0)
return (error);
tlen = saved_resid - uio->uio_resid;
xfersize -= tlen;
*size -= tlen;
}
return (0);
}
/*
* Vnode op for reading.
*/
@ -483,9 +503,7 @@ ffs_read(ap)
off_t bytesinfile;
long size, xfersize, blkoffset;
ssize_t orig_resid;
int error;
int seqcount;
int ioflag;
int bflag, error, ioflag, seqcount;
vp = ap->a_vp;
uio = ap->a_uio;
@ -529,6 +547,7 @@ ffs_read(ap)
uio->uio_offset >= fs->fs_maxfilesize)
return (EOVERFLOW);
bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
break;
@ -565,8 +584,7 @@ ffs_read(ap)
/*
* Don't do readahead if this is the end of the file.
*/
error = bread_gb(vp, lbn, size, NOCRED,
GB_UNMAPPED, &bp);
error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
/*
* Otherwise if we are allowed to cluster,
@ -577,7 +595,7 @@ ffs_read(ap)
*/
error = cluster_read(vp, ip->i_size, lbn,
size, NOCRED, blkoffset + uio->uio_resid,
seqcount, GB_UNMAPPED, &bp);
seqcount, bflag, &bp);
} else if (seqcount > 1) {
/*
* If we are NOT allowed to cluster, then
@ -589,17 +607,21 @@ ffs_read(ap)
*/
u_int nextsize = blksize(fs, ip, nextlbn);
error = breadn_flags(vp, lbn, size, &nextlbn,
&nextsize, 1, NOCRED, GB_UNMAPPED, NULL, &bp);
&nextsize, 1, NOCRED, bflag, NULL, &bp);
} else {
/*
* Failing all of the above, just read what the
* user asked for. Interestingly, the same as
* the first option above.
*/
error = bread_gb(vp, lbn, size, NOCRED,
GB_UNMAPPED, &bp);
error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
}
if (error) {
if (error == EJUSTRETURN) {
error = ffs_read_hole(uio, xfersize, &size);
if (error == 0)
continue;
}
if (error != 0) {
brelse(bp);
bp = NULL;
break;