Optimize lseek(SEEK_DATA) on UFS.

This version fixes the problems identified in r345244.

Reviewed by:	kib
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D19598
This commit is contained in:
markj 2019-04-29 22:05:26 +00:00
parent bfb9c17e43
commit a7a455c299
3 changed files with 162 additions and 30 deletions

View File

@ -56,6 +56,9 @@ __FBSDID("$FreeBSD$");
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
static ufs_lbn_t lbn_count(struct ufsmount *, int);
static int readindir(struct vnode *, ufs_lbn_t, ufs2_daddr_t, struct buf **);
/*
* Bmap converts the logical block number of a file to its physical block
* number on the disk. The conversion is done by using the logical block
@ -90,6 +93,51 @@ ufs_bmap(ap)
return (error);
}
static int
readindir(vp, lbn, daddr, bpp)
struct vnode *vp;
ufs_lbn_t lbn;
ufs2_daddr_t daddr;
struct buf **bpp;
{
struct buf *bp;
struct mount *mp;
struct ufsmount *ump;
int error;
mp = vp->v_mount;
ump = VFSTOUFS(mp);
bp = getblk(vp, lbn, mp->mnt_stat.f_iosize, 0, 0, 0);
if ((bp->b_flags & B_CACHE) == 0) {
KASSERT(daddr != 0,
("readindir: indirect block not in cache"));
bp->b_blkno = blkptrtodb(ump, daddr);
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
vfs_busy_pages(bp, 0);
bp->b_iooffset = dbtob(bp->b_blkno);
bstrategy(bp);
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, bp, 0);
PROC_UNLOCK(curproc);
}
#endif
curthread->td_ru.ru_inblock++;
error = bufwait(bp);
if (error != 0) {
brelse(bp);
return (error);
}
}
*bpp = bp;
return (0);
}
/*
* Indirect blocks are now on the vnode for the file. They are given negative
* logical block numbers. Indirect blocks are addressed by the negative
@ -212,34 +260,9 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
*/
if (bp)
bqrelse(bp);
bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0, 0);
if ((bp->b_flags & B_CACHE) == 0) {
#ifdef INVARIANTS
if (!daddr)
panic("ufs_bmaparray: indirect block not in cache");
#endif
bp->b_blkno = blkptrtodb(ump, daddr);
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
vfs_busy_pages(bp, 0);
bp->b_iooffset = dbtob(bp->b_blkno);
bstrategy(bp);
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(curproc);
racct_add_buf(curproc, bp, 0);
PROC_UNLOCK(curproc);
}
#endif /* RACCT */
curthread->td_ru.ru_inblock++;
error = bufwait(bp);
if (error) {
brelse(bp);
error = readindir(vp, metalbn, daddr, &bp);
if (error != 0)
return (error);
}
}
if (I_IS_UFS1(ip)) {
daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off];
@ -303,6 +326,111 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
return (0);
}
static ufs_lbn_t
lbn_count(ump, level)
struct ufsmount *ump;
int level;
{
ufs_lbn_t blockcnt;
for (blockcnt = 1; level > 0; level--)
blockcnt *= MNINDIR(ump);
return (blockcnt);
}
int
ufs_bmap_seekdata(vp, offp)
struct vnode *vp;
off_t *offp;
{
struct buf *bp;
struct indir a[UFS_NIADDR + 1], *ap;
struct inode *ip;
struct mount *mp;
struct ufsmount *ump;
ufs2_daddr_t bn, daddr, nextbn;
uint64_t bsize;
off_t numblks;
int error, num, num1, off;
bp = NULL;
ip = VTOI(vp);
mp = vp->v_mount;
ump = VFSTOUFS(mp);
if (vp->v_type != VREG || (ip->i_flags & SF_SNAPSHOT) != 0)
return (EINVAL);
if (*offp < 0 || *offp >= ip->i_size)
return (ENXIO);
bsize = mp->mnt_stat.f_iosize;
for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize);
bn < numblks; bn = nextbn) {
if (bn < UFS_NDADDR) {
daddr = DIP(ip, i_db[bn]);
if (daddr != 0)
break;
nextbn = bn + 1;
continue;
}
ap = a;
error = ufs_getlbns(vp, bn, ap, &num);
if (error != 0)
break;
MPASS(num >= 2);
daddr = DIP(ip, i_ib[ap->in_off]);
ap++, num--;
for (nextbn = UFS_NDADDR, num1 = num - 1; num1 > 0; num1--)
nextbn += lbn_count(ump, num1);
if (daddr == 0) {
nextbn += lbn_count(ump, num);
continue;
}
for (; daddr != 0 && num > 0; ap++, num--) {
if (bp != NULL)
bqrelse(bp);
error = readindir(vp, ap->in_lbn, daddr, &bp);
if (error != 0)
return (error);
/*
* Scan the indirect block until we find a non-zero
* pointer.
*/
off = ap->in_off;
do {
daddr = I_IS_UFS1(ip) ?
((ufs1_daddr_t *)bp->b_data)[off] :
((ufs2_daddr_t *)bp->b_data)[off];
} while (daddr == 0 && ++off < MNINDIR(ump));
nextbn += off * lbn_count(ump, num - 1);
/*
* We need to recompute the LBNs of indirect
* blocks, so restart with the updated block offset.
*/
if (off != ap->in_off)
break;
}
if (num == 0) {
/*
* We found a data block.
*/
bn = nextbn;
break;
}
}
if (bp != NULL)
bqrelse(bp);
if (bn >= numblks)
error = ENXIO;
if (error == 0 && *offp < bn * bsize)
*offp = bn * bsize;
return (error);
}
/*
* Create an array of logical block number/offset pairs which represent the
* path of indirect blocks required to access a data block. The first "pair"

View File

@ -58,6 +58,7 @@ extern struct vop_vector ufs_vnodeops;
int ufs_bmap(struct vop_bmap_args *);
int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
struct buf *, int *, int *);
int ufs_bmap_seekdata(struct vnode *, off_t *);
int ufs_fhtovp(struct mount *, struct ufid *, int, struct vnode **);
int ufs_checkpath(ino_t, ino_t, struct inode *, struct ucred *, ino_t *);
void ufs_dirbad(struct inode *, doff_t, char *);

View File

@ -2701,12 +2701,15 @@ bad:
static int
ufs_ioctl(struct vop_ioctl_args *ap)
{
struct vnode *vp;
vp = ap->a_vp;
switch (ap->a_command) {
case FIOSEEKDATA:
return (ufs_bmap_seekdata(vp, (off_t *)ap->a_data));
case FIOSEEKHOLE:
return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
(off_t *)ap->a_data, ap->a_cred));
return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data,
ap->a_cred));
default:
return (ENOTTY);
}