Fix a file-rewrite performance case for UFS[2]. When rewriting portions
of a file in chunks that are less then the filesystem block size, if the data is not already cached the system will perform a read-before-write. The problem is that it does this on a block-by-block basis, breaking up the I/Os and making clustering impossible for the writes. Programs such as INN using cyclic file buffers suffer greatly. This problem is only going to get worse as we use larger and larger filesystem block sizes. The solution is to extend the sequential heuristic so UFS[2] can perform a far larger read and readahead when dealing with this case. (note: maximum disk write bandwidth is 27MB/sec thru filesystem) (note: filesystem blocksize in test is 8K (1K frag)) dd if=/dev/zero of=test.dat bs=1k count=2m conv=notrunc Before: (note half of these are reads) tty da0 da1 acd0 cpu tin tout KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s us ni sy in id 0 76 14.21 598 8.30 0.00 0 0.00 0.00 0 0.00 0 0 7 1 92 0 76 14.09 813 11.19 0.00 0 0.00 0.00 0 0.00 0 0 9 5 86 0 76 14.28 821 11.45 0.00 0 0.00 0.00 0 0.00 0 0 8 1 91 After: (note half of these are reads) tty da0 da1 acd0 cpu tin tout KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s us ni sy in id 0 76 63.62 434 26.99 0.00 0 0.00 0.00 0 0.00 0 0 18 1 80 0 76 63.58 424 26.30 0.00 0 0.00 0.00 0 0.00 0 0 17 2 82 0 76 63.82 438 27.32 0.00 0 0.00 0.00 0 0.00 1 0 19 2 79 Reviewed by: mckusick Approved by: re X-MFC after: immediately (was heavily tested in -stable for 4 months)
This commit is contained in:
parent
76ba4ecdad
commit
1b7e3dafdf
@ -341,7 +341,14 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
|
||||
}
|
||||
brelse(bp);
|
||||
if (flags & BA_CLRBUF) {
|
||||
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
|
||||
int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
|
||||
if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
|
||||
error = cluster_read(vp, ip->i_size, lbn,
|
||||
(int)fs->fs_bsize, NOCRED,
|
||||
MAXBSIZE, seqcount, &nbp);
|
||||
} else {
|
||||
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
|
||||
}
|
||||
if (error) {
|
||||
brelse(nbp);
|
||||
goto fail;
|
||||
@ -788,8 +795,21 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
|
||||
return (0);
|
||||
}
|
||||
brelse(bp);
|
||||
/*
|
||||
* If requested clear invalid portions of the buffer. If we
|
||||
* have to do a read-before-write (typical if BA_CLRBUF is set),
|
||||
* try to do some read-ahead in the sequential case to reduce
|
||||
* the number of I/O transactions.
|
||||
*/
|
||||
if (flags & BA_CLRBUF) {
|
||||
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
|
||||
int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
|
||||
if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
|
||||
error = cluster_read(vp, ip->i_size, lbn,
|
||||
(int)fs->fs_bsize, NOCRED,
|
||||
MAXBSIZE, seqcount, &nbp);
|
||||
} else {
|
||||
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
|
||||
}
|
||||
if (error) {
|
||||
brelse(nbp);
|
||||
goto fail;
|
||||
|
@ -749,9 +749,12 @@ ffs_write(ap)
|
||||
|
||||
resid = uio->uio_resid;
|
||||
osize = ip->i_size;
|
||||
flags = 0;
|
||||
if (seqcount > BA_SEQMAX)
|
||||
flags = BA_SEQMAX << BA_SEQSHIFT;
|
||||
else
|
||||
flags = seqcount << BA_SEQSHIFT;
|
||||
if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
|
||||
flags = IO_SYNC;
|
||||
flags |= IO_SYNC;
|
||||
|
||||
#ifdef ENABLE_VFS_IOOPT
|
||||
if (object && (object->flags & OBJ_OPT)) {
|
||||
|
@ -110,10 +110,16 @@ void softdep_releasefile(struct inode *);
|
||||
int softdep_slowdown(struct vnode *);
|
||||
|
||||
/*
|
||||
* Flags to low-level allocation routines.
|
||||
* The low 16-bits are reserved for IO_ flags from vnode.h.
|
||||
* Flags to low-level allocation routines. The low 16-bits are reserved
|
||||
* for IO_ flags from vnode.h.
|
||||
*
|
||||
* Note: The general vfs code typically limits the sequential heuristic
|
||||
* count to 127. See sequential_heuristic() in kern/vfs_vnops.c
|
||||
*/
|
||||
#define BA_CLRBUF 0x00010000 /* Request alloced buffer be cleared. */
|
||||
#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */
|
||||
#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
|
||||
#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */
|
||||
#define BA_SEQSHIFT 24
|
||||
#define BA_SEQMAX 0x7F
|
||||
|
||||
#endif /* !_UFS_UFS_EXTERN_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user