Implement vfs clustering for msdosfs.

This gives a very large speedup for small block sizes (in my tests,
about 5 times for write and 3 times for read with a block size of 512,
if clustering is possible) and a moderate speedup for the moderatatly
large block sizes that should be used on non-small media (4K is the
best size in most cases, and the speedup for that is about 1.3 times
for write and 1.2 times for read).  mmap() should benefit from clustering
like read()/write(), but the current implementation of vm only supports
clustering (at least for getpages) if the fs block size is >= PAGE SIZE.

msdosfs is now only slightly slower than ffs with soft updates for
writing and slightly faster for reading when both use their best block
sizes.  Writing is slower for msdosfs because of more sync writes.
Reading is faster for msdosfs because indirect blocks interfere with
clustering in ffs.

The changes in msdosfs_read() and msdosfs_write() are simpler merges
of corresponding code in ffs (after fixing some style bugs in ffs).
msdosfs_bmap() needs fs-specific code.  This implementation loops
calling a lower level bmap function to do the hard parts.  This is a
bit inefficient, but is efficient enough since msdsfs_bmap() is only
called when there is physical i/o to do.

Approved by:	re (hrs)
This commit is contained in:
bde 2007-07-20 17:06:57 +00:00
parent b2bdcce9e1
commit 8fdd1a79d0

View File

@ -598,6 +598,9 @@ msdosfs_read(ap)
error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
} else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) {
error = bread(vp, lbn, blsize, NOCRED, &bp);
} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
NOCRED, on + uio->uio_resid, seqcount, &bp);
} else if (seqcount > 1) {
rasize = blsize;
error = breadn(vp, lbn,
@ -644,6 +647,7 @@ msdosfs_write(ap)
u_long osize;
int error = 0;
u_long count;
int seqcount;
daddr_t bn, lastcn;
struct buf *bp;
int ioflag = ap->a_ioflag;
@ -729,6 +733,7 @@ msdosfs_write(ap)
} else
lastcn = de_clcount(pmp, osize) - 1;
seqcount = ioflag >> IO_SEQSHIFT;
do {
if (de_cluster(pmp, uio->uio_offset) > lastcn) {
error = ENOSPC;
@ -797,19 +802,31 @@ msdosfs_write(ap)
break;
}
/* Prepare for clustered writes in some else clauses. */
if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
bp->b_flags |= B_CLUSTEROK;
/*
* If IO_SYNC, then each buffer is written synchronously.
* Otherwise, if on a
* Otherwise, if we have a severe page deficiency then
* write the buffer asynchronously. Otherwise, if on a
* cluster boundary then write the buffer asynchronously,
* since we don't expect more writes into this
* combining it with contiguous clusters if permitted and
* possible, since we don't expect more writes into this
* buffer soon. Otherwise, do a delayed write because we
* expect more writes into this buffer soon.
*/
if (ioflag & IO_SYNC)
(void)bwrite(bp);
else if (n + croffset == pmp->pm_bpcluster)
else if (vm_page_count_severe() || buf_dirty_count_severe())
bawrite(bp);
else
else if (n + croffset == pmp->pm_bpcluster) {
if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
cluster_write(vp, bp, dep->de_FileSize,
seqcount);
else
bawrite(bp);
} else
bdwrite(bp);
dep->de_flag |= DE_UPDATE;
} while (error == 0 && uio->uio_resid > 0);
@ -1778,10 +1795,12 @@ msdosfs_bmap(ap)
} */ *ap;
{
struct denode *dep;
struct mount *mp;
struct msdosfsmount *pmp;
struct vnode *vp;
daddr_t runbn;
u_long cn;
int error;
int bnpercn, error, maxio, maxrun, run;
vp = ap->a_vp;
dep = VTODE(vp);
@ -1798,7 +1817,31 @@ msdosfs_bmap(ap)
if (cn != ap->a_bn)
return (EFBIG);
error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL);
return (error);
if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL))
return (error);
mp = vp->v_mount;
maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize;
bnpercn = de_cn2bn(pmp, 1);
if (ap->a_runp != NULL) {
maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn);
for (run = 1; run <= maxrun; run++) {
if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 ||
runbn != *ap->a_bnp + run * bnpercn)
break;
}
*ap->a_runp = run - 1;
}
if (ap->a_runb != NULL) {
maxrun = ulmin(maxio - 1, cn);
for (run = 1; run < maxrun; run++) {
if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 ||
runbn != *ap->a_bnp - run * bnpercn)
break;
}
*ap->a_runb = run - 1;
}
return (0);
}
static int