Fix a file-rewrite performance case for UFS[2]. When rewriting portions

of a file in chunks that are less then the filesystem block size, if the
data is not already cached the system will perform a read-before-write.
The problem is that it does this on a block-by-block basis, breaking up the
I/Os and making clustering impossible for the writes.  Programs such
as INN using cyclic file buffers suffer greatly.  This problem is only going
to get worse as we use larger and larger filesystem block sizes.

The solution is to extend the sequential heuristic so UFS[2] can perform
a far larger read and readahead when dealing with this case.

(note: maximum disk write bandwidth is 27MB/sec thru filesystem)
(note: filesystem blocksize in test is 8K (1K frag))
dd if=/dev/zero of=test.dat bs=1k count=2m conv=notrunc

Before:  (note half of these are reads)
      tty             da0              da1             acd0             cpu
 tin tout  KB/t tps  MB/s   KB/t tps  MB/s   KB/t tps  MB/s  us ni sy in id
   0   76 14.21 598  8.30   0.00   0  0.00   0.00   0  0.00   0  0  7  1 92
   0   76 14.09 813 11.19   0.00   0  0.00   0.00   0  0.00   0  0  9  5 86
   0   76 14.28 821 11.45   0.00   0  0.00   0.00   0  0.00   0  0  8  1 91

After:	(note half of these are reads)
      tty             da0              da1             acd0             cpu
 tin tout  KB/t tps  MB/s   KB/t tps  MB/s   KB/t tps  MB/s  us ni sy in id
   0   76 63.62 434 26.99   0.00   0  0.00   0.00   0  0.00   0  0 18  1 80
   0   76 63.58 424 26.30   0.00   0  0.00   0.00   0  0.00   0  0 17  2 82
   0   76 63.82 438 27.32   0.00   0  0.00   0.00   0  0.00   1  0 19  2 79

Reviewed by:	mckusick
Approved by:	re
X-MFC after:	immediately (was heavily tested in -stable for 4 months)
This commit is contained in:
Matthew Dillon 2002-10-18 22:52:41 +00:00
parent 76ba4ecdad
commit 1b7e3dafdf
3 changed files with 36 additions and 7 deletions

View File

@ -341,7 +341,14 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
}
brelse(bp);
if (flags & BA_CLRBUF) {
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
error = cluster_read(vp, ip->i_size, lbn,
(int)fs->fs_bsize, NOCRED,
MAXBSIZE, seqcount, &nbp);
} else {
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
}
if (error) {
brelse(nbp);
goto fail;
@ -788,8 +795,21 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
return (0);
}
brelse(bp);
/*
* If requested clear invalid portions of the buffer. If we
* have to do a read-before-write (typical if BA_CLRBUF is set),
* try to do some read-ahead in the sequential case to reduce
* the number of I/O transactions.
*/
if (flags & BA_CLRBUF) {
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
error = cluster_read(vp, ip->i_size, lbn,
(int)fs->fs_bsize, NOCRED,
MAXBSIZE, seqcount, &nbp);
} else {
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
}
if (error) {
brelse(nbp);
goto fail;

View File

@ -749,9 +749,12 @@ ffs_write(ap)
resid = uio->uio_resid;
osize = ip->i_size;
flags = 0;
if (seqcount > BA_SEQMAX)
flags = BA_SEQMAX << BA_SEQSHIFT;
else
flags = seqcount << BA_SEQSHIFT;
if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
flags = IO_SYNC;
flags |= IO_SYNC;
#ifdef ENABLE_VFS_IOOPT
if (object && (object->flags & OBJ_OPT)) {

View File

@ -110,10 +110,16 @@ void softdep_releasefile(struct inode *);
int softdep_slowdown(struct vnode *);
/*
* Flags to low-level allocation routines.
* The low 16-bits are reserved for IO_ flags from vnode.h.
* Flags to low-level allocation routines. The low 16-bits are reserved
* for IO_ flags from vnode.h.
*
* Note: The general vfs code typically limits the sequential heuristic
* count to 127. See sequential_heuristic() in kern/vfs_vnops.c
*/
#define BA_CLRBUF 0x00010000 /* Request alloced buffer be cleared. */
#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */
#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */
#define BA_SEQSHIFT 24
#define BA_SEQMAX 0x7F
#endif /* !_UFS_UFS_EXTERN_H_ */