Optimize lseek(SEEK_DATA) on UFS.

The old implementation, at the VFS layer, would map the entire range of
logical blocks between the starting offset and the first data block
following that offset.  With large sparse files this is very
inefficient.  The VFS currently doesn't provide an interface to improve
upon the current implementation in a generic way.

Add ufs_bmap_seekdata(), which uses the obvious algorithm of scanning
indirect blocks to look for data blocks.  Use it instead of
vn_bmap_seekhole() to implement SEEK_DATA.

Reviewed by:	kib, mckusick
MFC after:	2 weeks
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D19598
This commit is contained in:
markj 2019-03-17 17:34:06 +00:00
parent ad625eb486
commit 3139ccc80b
3 changed files with 141 additions and 30 deletions

View File

@ -90,6 +90,51 @@ ufs_bmap(ap)
return (error);
}
static int
ufs_readindir(vp, lbn, daddr, bpp)
struct vnode *vp;
ufs_lbn_t lbn;
ufs2_daddr_t daddr;
struct buf **bpp;
{
struct buf *bp;
struct mount *mp;
struct ufsmount *ump;
int error;
mp = vp->v_mount;
ump = VFSTOUFS(mp);
bp = getblk(vp, lbn, mp->mnt_stat.f_iosize, 0, 0, 0);
if ((bp->b_flags & B_CACHE) == 0) {
KASSERT(daddr != 0,
("ufs_readindir: indirect block not in cache"));
bp->b_blkno = blkptrtodb(ump, daddr);
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
vfs_busy_pages(bp, 0);
bp->b_iooffset = dbtob(bp->b_blkno);
bstrategy(bp);
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, bp, 0);
PROC_UNLOCK(curproc);
}
#endif
curthread->td_ru.ru_inblock++;
error = bufwait(bp);
if (error != 0) {
brelse(bp);
return (error);
}
}
*bpp = bp;
return (0);
}
/*
* Indirect blocks are now on the vnode for the file. They are given negative
* logical block numbers. Indirect blocks are addressed by the negative
@ -212,34 +257,9 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
*/
if (bp)
bqrelse(bp);
bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, 0);
if ((bp->b_flags & B_CACHE) == 0) {
#ifdef INVARIANTS
if (!daddr)
panic("ufs_bmaparray: indirect block not in cache");
#endif
bp->b_blkno = blkptrtodb(ump, daddr);
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
vfs_busy_pages(bp, 0);
bp->b_iooffset = dbtob(bp->b_blkno);
bstrategy(bp);
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, bp, 0);
PROC_UNLOCK(curproc);
}
#endif /* RACCT */
curthread->td_ru.ru_inblock++;
error = bufwait(bp);
if (error) {
brelse(bp);
return (error);
}
}
error = ufs_readindir(vp, metalbn, daddr, &bp);
if (error != 0)
return (error);
if (I_IS_UFS1(ip)) {
daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off];
@ -303,6 +323,93 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
return (0);
}
int
ufs_bmap_seekdata(vp, offp)
struct vnode *vp;
off_t *offp;
{
struct buf *bp;
struct indir a[UFS_NIADDR + 1], *ap;
struct inode *ip;
struct mount *mp;
struct ufsmount *ump;
ufs2_daddr_t blockcnt, bn, daddr;
uint64_t bsize;
off_t numblks;
int error, num, num1;
bp = NULL;
ip = VTOI(vp);
mp = vp->v_mount;
ump = VFSTOUFS(mp);
if (vp->v_type != VREG || (ip->i_flags & SF_SNAPSHOT) != 0)
return (EINVAL);
if (*offp < 0)
return (ENXIO);
bsize = mp->mnt_stat.f_iosize;
for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize);
bn < numblks;) {
if (bn < UFS_NDADDR) {
daddr = DIP(ip, i_db[bn]);
if (daddr != 0)
break;
bn++;
continue;
}
ap = a;
error = ufs_getlbns(vp, bn, ap, &num);
if (error != 0)
break;
MPASS(num >= 2);
daddr = DIP(ip, i_ib[ap->in_off]);
ap++, num--;
if (daddr == 0) {
for (blockcnt = 1; num > 0; num--)
blockcnt *= MNINDIR(ump);
bn += blockcnt;
continue;
}
for (; num > 0 && daddr != 0; ap++, num--) {
if (bp != NULL)
bqrelse(bp);
error = ufs_readindir(vp, ap->in_lbn, daddr, &bp);
if (error != 0)
return (error);
/*
* Precompute the number of blocks addressed by an entry
* of the current indirect block.
*/
for (blockcnt = 1, num1 = num; num1 > 1; num1--)
blockcnt *= MNINDIR(ump);
for (; ap->in_off < MNINDIR(ump); ap->in_off++,
bn += blockcnt) {
daddr = I_IS_UFS1(ip) ?
((ufs1_daddr_t *)bp->b_data)[ap->in_off] :
((ufs2_daddr_t *)bp->b_data)[ap->in_off];
if (daddr != 0)
break;
}
}
if (daddr != 0) {
MPASS(num == 0);
break;
}
}
if (bp != NULL)
bqrelse(bp);
if (bn >= numblks)
error = ENXIO;
if (error == 0)
*offp = bn * bsize;
return (error);
}
/*
* Create an array of logical block number/offset pairs which represent the
* path of indirect blocks required to access a data block. The first "pair"

View File

@ -58,6 +58,7 @@ extern struct vop_vector ufs_vnodeops;
int ufs_bmap(struct vop_bmap_args *);
int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
struct buf *, int *, int *);
int ufs_bmap_seekdata(struct vnode *, off_t *);
int ufs_fhtovp(struct mount *, struct ufid *, int, struct vnode **);
int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
void ufs_dirbad(struct inode *, doff_t, char *);

View File

@ -2701,12 +2701,15 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
static int
ufs_ioctl(struct vop_ioctl_args *ap)
{
struct vnode *vp;
vp = ap->a_vp;
switch (ap->a_command) {
case FIOSEEKDATA:
return (ufs_bmap_seekdata(vp, (off_t *)ap->a_data));
case FIOSEEKHOLE:
return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
(off_t *)ap->a_data, ap->a_cred));
return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data,
ap->a_cred));
default:
return (ENOTTY);
}