General fixes to the vfs clustring code:

1) Make cluster buffer list be a non-malloced chain.  This eliminates
yet another 'evil' M_WAITOK and generally cleans up the code.
2) Fix write clustering for ext2fs.  It was just broken.  Also, ffs
clustering had an efficiency problem that more bawrites were happening
than should have been.
3) Make changes to buf.h to support the above, plus remove b_pfcent
at the request of David Greenman.
Reviewed by: davidg (partially)
This commit is contained in:
John Dyson 1995-11-19 19:54:31 +00:00
parent 43849ddc7b
commit 5fe17eeb8a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=12404
4 changed files with 99 additions and 168 deletions

View File

@ -18,7 +18,7 @@
* 5. Modifications may be freely made to this file if the above conditions
* are met.
*
* $Id: vfs_bio.c,v 1.69 1995/11/05 20:45:49 dyson Exp $
* $Id: vfs_bio.c,v 1.70 1995/11/18 23:33:48 dyson Exp $
*/
/*
@ -102,11 +102,6 @@ vm_offset_t bogus_offset;
int bufspace, maxbufspace;
/*
* advisory minimum for size of LRU queue or VMIO queue
*/
int minbuf;
struct bufhashhdr bufhashtbl[BUFHSZ], invalhash;
struct bqueues bufqueues[BUFFER_QUEUES];
@ -151,7 +146,6 @@ bufinit()
* cache is still the same as it would be for 8K filesystems. This
* keeps the size of the buffer cache "in check" for big block filesystems.
*/
minbuf = nbuf / 3;
maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE;
bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
@ -404,7 +398,6 @@ brelse(struct buf * bp)
bp->b_flags &= ~(B_WANTED | B_AGE);
wakeup(bp);
} else if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_WANTED;
wakeup(bp);
}
if (bp->b_flags & B_LOCKED)
@ -616,19 +609,18 @@ vfs_bio_awrite(struct buf * bp)
* this is a possible cluster write
*/
if (ncl != 1) {
bremfree(bp);
cluster_wbuild(vp, bp, size, lblkno, ncl, -1);
cluster_wbuild(vp, size, lblkno, ncl);
splx(s);
return;
}
}
bremfree(bp);
splx(s);
/*
* default (old) behavior, writing out only one block
*/
bremfree(bp);
bp->b_flags |= B_BUSY | B_ASYNC;
(void) VOP_BWRITE(bp);
splx(s);
}
@ -888,6 +880,7 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
* We are conservative on metadata and don't just extend the buffer
* but write and re-constitute it.
*/
if (bp->b_bcount != size) {
if (bp->b_flags & B_VMIO) {
allocbuf(bp, size);
@ -897,6 +890,7 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
goto loop;
}
}
/*
* make sure that all pages in the buffer are valid, if they
* aren't, clear the cache flag.
@ -1492,9 +1486,7 @@ vfs_bio_clrbuf(struct buf *bp) {
if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) {
int j;
if( bp->b_pages[0]->valid != VM_PAGE_BITS_ALL) {
for(j=0; j < bp->b_bufsize / DEV_BSIZE;j++) {
bzero(bp->b_data + j * DEV_BSIZE, DEV_BSIZE);
}
bzero(bp->b_data, bp->b_bufsize);
}
bp->b_resid = 0;
return;
@ -1503,7 +1495,8 @@ vfs_bio_clrbuf(struct buf *bp) {
if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL)
continue;
if( bp->b_pages[i]->valid == 0) {
bzero(bp->b_data + i * PAGE_SIZE, PAGE_SIZE);
if ((bp->b_pages[i]->flags & PG_ZERO) == 0)
bzero(bp->b_data + i * PAGE_SIZE, PAGE_SIZE);
} else {
int j;
for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) {

View File

@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
* $Id: vfs_cluster.c,v 1.23 1995/10/29 15:31:22 phk Exp $
* $Id: vfs_cluster.c,v 1.24 1995/11/14 09:19:07 phk Exp $
*/
#include <sys/param.h>
@ -278,7 +278,6 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run)
long size;
int run;
{
struct cluster_save *b_save;
struct buf *bp, *tbp;
daddr_t bn;
int i, inc, j;
@ -311,11 +310,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run)
bp->b_lblkno = lbn;
pbgetvp(vp, bp);
b_save = malloc(sizeof(struct buf *) * run +
sizeof(struct cluster_save), M_SEGMENT, M_WAITOK);
b_save->bs_nchildren = 0;
b_save->bs_children = (struct buf **) (b_save + 1);
bp->b_saveaddr = b_save;
TAILQ_INIT(&bp->b_cluster.cluster_head);
bp->b_bcount = 0;
bp->b_bufsize = 0;
@ -360,8 +355,8 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run)
break;
}
}
++b_save->bs_nchildren;
b_save->bs_children[i] = tbp;
TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
tbp, b_cluster.cluster_entry);
for (j = 0; j < tbp->b_npages; j += 1) {
vm_page_t m;
m = tbp->b_pages[j];
@ -398,8 +393,7 @@ void
cluster_callback(bp)
struct buf *bp;
{
struct cluster_save *b_save;
struct buf **bpp, *tbp;
struct buf *nbp, *tbp;
int error = 0;
/*
@ -408,21 +402,20 @@ cluster_callback(bp)
if (bp->b_flags & B_ERROR)
error = bp->b_error;
b_save = (struct cluster_save *) (bp->b_saveaddr);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
/*
* Move memory from the large cluster buffer into the component
* buffers and mark IO as done on these.
*/
for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) {
tbp = *bpp;
for (tbp = bp->b_cluster.cluster_head.tqh_first;
tbp; tbp = nbp) {
nbp = tbp->b_cluster.cluster_entry.tqe_next;
if (error) {
tbp->b_flags |= B_ERROR;
tbp->b_error = error;
}
biodone(tbp);
}
free(b_save, M_SEGMENT);
relpbuf(bp);
}
@ -445,8 +438,10 @@ cluster_write(bp, filesize)
daddr_t lbn;
int maxclen, cursize;
int lblocksize;
int async;
vp = bp->b_vp;
async = (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC));
lblocksize = vp->v_mount->mnt_stat.f_iosize;
lbn = bp->b_lblkno;
@ -468,11 +463,21 @@ cluster_write(bp, filesize)
* reallocating to make it sequential.
*/
cursize = vp->v_lastw - vp->v_cstart + 1;
#if 1
if ((lbn + 1) * lblocksize != filesize ||
lbn != vp->v_lastw + 1 ||
vp->v_clen <= cursize) {
if (!async)
cluster_wbuild(vp, lblocksize,
vp->v_cstart, cursize);
}
#else
if (!doreallocblks ||
(lbn + 1) * lblocksize != filesize ||
lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
cluster_wbuild(vp, NULL, lblocksize,
vp->v_cstart, cursize, lbn);
if (!async)
cluster_wbuild(vp, lblocksize,
vp->v_cstart, cursize);
} else {
struct buf **bpp, **endbp;
struct cluster_save *buflist;
@ -488,8 +493,8 @@ cluster_write(bp, filesize)
bpp < endbp; bpp++)
brelse(*bpp);
free(buflist, M_SEGMENT);
cluster_wbuild(vp, NULL, lblocksize,
vp->v_cstart, cursize, lbn);
cluster_wbuild(vp, lblocksize,
vp->v_cstart, cursize);
} else {
/*
* Succeeded, keep building cluster.
@ -503,6 +508,7 @@ cluster_write(bp, filesize)
return;
}
}
#endif
}
/*
* Consider beginning a cluster. If at end of file, make
@ -521,7 +527,7 @@ cluster_write(bp, filesize)
return;
}
vp->v_clen = maxclen;
if (maxclen == 0) { /* I/O not contiguous */
if (!async && maxclen == 0) { /* I/O not contiguous */
vp->v_cstart = lbn + 1;
bawrite(bp);
} else { /* Wait for rest of cluster */
@ -532,8 +538,9 @@ cluster_write(bp, filesize)
/*
* At end of cluster, write it out.
*/
cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
vp->v_clen + 1, lbn);
bdwrite(bp);
cluster_wbuild(vp, lblocksize, vp->v_cstart,
vp->v_clen + 1);
vp->v_clen = 0;
vp->v_cstart = lbn + 1;
} else
@ -553,61 +560,29 @@ cluster_write(bp, filesize)
* the current block (if last_bp == NULL).
*/
void
cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
cluster_wbuild(vp, size, start_lbn, len)
struct vnode *vp;
struct buf *last_bp;
long size;
daddr_t start_lbn;
int len;
daddr_t lbn;
{
struct cluster_save *b_save;
struct buf *bp, *tbp, *pb;
struct buf *bp, *tbp;
int i, j, s;
int dbsize = btodb(size);
int origlen = len;
#ifdef DIAGNOSTIC
if (size != vp->v_mount->mnt_stat.f_iosize)
panic("cluster_wbuild: size %d != filesize %d\n",
size, vp->v_mount->mnt_stat.f_iosize);
#endif
redo:
if( (lbn != -1) || (last_bp == 0)) {
while ((!(tbp = incore(vp, start_lbn)) || (tbp->b_flags & B_BUSY)
|| (start_lbn == lbn)) && len) {
++start_lbn;
--len;
}
pb = trypbuf();
/* Get more memory for current buffer */
if (len <= 1 || pb == NULL) {
if (pb != NULL)
relpbuf(pb);
if (last_bp) {
bawrite(last_bp);
} else if (len) {
bp = getblk(vp, start_lbn, size, 0, 0);
bawrite(bp);
}
return;
}
tbp = getblk(vp, start_lbn, size, 0, 0);
} else {
tbp = last_bp;
if( tbp->b_flags & B_BUSY) {
printf("vfs_cluster: warning: buffer already busy\n");
}
tbp->b_flags |= B_BUSY;
last_bp = 0;
pb = trypbuf();
if (pb == NULL) {
bawrite(tbp);
return;
}
if (len == 0)
return;
if ( ((tbp = incore(vp, start_lbn)) == NULL) ||
((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) {
++start_lbn;
--len;
goto redo;
}
if (!(tbp->b_flags & B_DELWRI)) {
relpbuf(pb);
tbp = getblk(vp, start_lbn, size, 0, 0);
if ((tbp->b_flags & B_DELWRI) == 0) {
++start_lbn;
--len;
brelse(tbp);
@ -620,27 +595,28 @@ cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
* potentially pull it back up if the cluster was terminated
* prematurely--too much hassle.
*/
if (((tbp->b_flags & B_VMIO) == 0) ||
(tbp->b_bcount != tbp->b_bufsize)) {
relpbuf(pb);
if (((tbp->b_flags & (B_VMIO|B_CLUSTEROK)) != (B_VMIO|B_CLUSTEROK)) ||
(tbp->b_bcount != tbp->b_bufsize) ||
len == 1) {
bawrite(tbp);
++start_lbn;
--len;
bawrite(tbp);
goto redo;
}
bp = pb;
b_save = malloc(sizeof(struct buf *) * (len + 1) + sizeof(struct cluster_save),
M_SEGMENT, M_WAITOK);
b_save->bs_nchildren = 0;
b_save->bs_children = (struct buf **) (b_save + 1);
bp->b_saveaddr = b_save;
bp = trypbuf();
if (bp == NULL) {
bawrite(tbp);
++start_lbn;
--len;
goto redo;
}
TAILQ_INIT(&bp->b_cluster.cluster_head);
bp->b_bcount = 0;
bp->b_bufsize = 0;
bp->b_npages = 0;
if (tbp->b_flags & B_VMIO)
bp->b_flags |= B_VMIO;
bp->b_blkno = tbp->b_blkno;
bp->b_lblkno = tbp->b_lblkno;
(vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
@ -650,41 +626,27 @@ cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
for (i = 0; i < len; ++i, ++start_lbn) {
if (i != 0) {
/*
* Block is not in core or the non-sequential block
* ending our cluster was part of the cluster (in
* which case we don't want to write it twice).
*/
if (!(tbp = incore(vp, start_lbn)) ||
(last_bp == NULL && start_lbn == lbn))
s = splhigh();
if ((tbp = incore(vp, start_lbn)) == NULL) {
splx(s);
break;
}
if ((tbp->b_flags & (B_INVAL | B_CLUSTEROK)) != B_CLUSTEROK)
if ((tbp->b_flags & (B_CLUSTEROK|B_INVAL|B_BUSY|B_DELWRI)) != (B_DELWRI|B_CLUSTEROK)) {
splx(s);
break;
}
if ((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))
if ((tbp->b_bcount != size) ||
((bp->b_blkno + dbsize * i) != tbp->b_blkno) ||
((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))) {
splx(s);
break;
if ( (tbp->b_blkno != tbp->b_lblkno) &&
((bp->b_blkno + btodb(size) * i) != tbp->b_blkno))
break;
/*
* Get the desired block buffer (unless it is the
* final sequential block whose buffer was passed in
* explictly as last_bp).
*/
if (last_bp == NULL || start_lbn != lbn) {
if( tbp->b_flags & B_BUSY)
break;
tbp = getblk(vp, start_lbn, size, 0, 0);
if (!(tbp->b_flags & B_DELWRI) ||
((tbp->b_flags & B_VMIO) != (bp->b_flags & B_VMIO))) {
brelse(tbp);
break;
}
} else
tbp = last_bp;
}
bremfree(tbp);
tbp->b_flags |= B_BUSY;
tbp->b_flags &= ~B_DONE;
splx(s);
}
for (j = 0; j < tbp->b_npages; j += 1) {
vm_page_t m;
@ -706,19 +668,18 @@ cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
reassignbuf(tbp, tbp->b_vp); /* put on clean list */
++tbp->b_vp->v_numoutput;
splx(s);
b_save->bs_children[i] = tbp;
TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
tbp, b_cluster.cluster_entry);
}
b_save->bs_nchildren = i;
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *) bp->b_pages, bp->b_npages);
bawrite(bp);
if (i < len) {
len -= i;
goto redo;
}
len -= i;
goto redo;
}
#if 0
/*
* Collect together all the buffers in a cluster.
* Plus add one additional buffer.
@ -744,3 +705,4 @@ cluster_collectbufs(vp, last_bp)
buflist->bs_nchildren = i + 1;
return (buflist);
}
#endif

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.7 (Berkeley) 1/21/94
* $Id: buf.h,v 1.20 1995/07/29 11:42:43 bde Exp $
* $Id: buf.h,v 1.21 1995/08/24 12:57:17 davidg Exp $
*/
#ifndef _SYS_BUF_H_
@ -84,7 +84,6 @@ struct buf {
/* For nested b_iodone's. */
struct iodone_chain *b_iodone_chain;
struct vnode *b_vp; /* Device vnode. */
int b_pfcent; /* Center page when swapping cluster. */
int b_dirtyoff; /* Offset in buffer of dirty region. */
int b_dirtyend; /* Offset of end of dirty region. */
struct ucred *b_rcred; /* Read credentials reference. */
@ -96,6 +95,10 @@ struct buf {
void *b_driver1; /* for private use by the driver */
void *b_driver2; /* for private use by the driver */
void *b_spc;
union cluster_info {
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
} b_cluster;
struct vm_page *b_pages[(MAXPHYS + PAGE_SIZE - 1)/PAGE_SIZE];
int b_npages;
};
@ -142,20 +145,6 @@ struct buf {
#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
#define B_BOUNCE 0x80000000 /* bounce buffer flag */
/*
* This structure describes a clustered I/O. It is stored in the b_saveaddr
* field of the buffer on which I/O is done. At I/O completion, cluster
* callback uses the structure to parcel I/O's to individual buffers, and
* then free's this structure.
*/
struct cluster_save {
long bs_bcount; /* Saved b_bcount. */
long bs_bufsize; /* Saved b_bufsize. */
void *bs_saveaddr; /* Saved b_addr. */
int bs_nchildren; /* Number of associated buffers. */
struct buf **bs_children; /* List of associated buffers. */
};
/*
* number of buffer hash entries
*/
@ -227,8 +216,7 @@ void biodone __P((struct buf *));
void cluster_callback __P((struct buf *));
int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, struct buf **));
void cluster_wbuild __P((struct vnode *, struct buf *, long, daddr_t, int,
daddr_t));
void cluster_wbuild __P((struct vnode *, long, daddr_t, int));
void cluster_write __P((struct buf *, u_quad_t));
int physio __P((void (*)(), struct buf *, dev_t, int, u_int (*)(),
struct uio *));

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.7 (Berkeley) 1/21/94
* $Id: buf.h,v 1.20 1995/07/29 11:42:43 bde Exp $
* $Id: buf.h,v 1.21 1995/08/24 12:57:17 davidg Exp $
*/
#ifndef _SYS_BUF_H_
@ -84,7 +84,6 @@ struct buf {
/* For nested b_iodone's. */
struct iodone_chain *b_iodone_chain;
struct vnode *b_vp; /* Device vnode. */
int b_pfcent; /* Center page when swapping cluster. */
int b_dirtyoff; /* Offset in buffer of dirty region. */
int b_dirtyend; /* Offset of end of dirty region. */
struct ucred *b_rcred; /* Read credentials reference. */
@ -96,6 +95,10 @@ struct buf {
void *b_driver1; /* for private use by the driver */
void *b_driver2; /* for private use by the driver */
void *b_spc;
union cluster_info {
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
} b_cluster;
struct vm_page *b_pages[(MAXPHYS + PAGE_SIZE - 1)/PAGE_SIZE];
int b_npages;
};
@ -142,20 +145,6 @@ struct buf {
#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
#define B_BOUNCE 0x80000000 /* bounce buffer flag */
/*
* This structure describes a clustered I/O. It is stored in the b_saveaddr
* field of the buffer on which I/O is done. At I/O completion, cluster
* callback uses the structure to parcel I/O's to individual buffers, and
* then free's this structure.
*/
struct cluster_save {
long bs_bcount; /* Saved b_bcount. */
long bs_bufsize; /* Saved b_bufsize. */
void *bs_saveaddr; /* Saved b_addr. */
int bs_nchildren; /* Number of associated buffers. */
struct buf **bs_children; /* List of associated buffers. */
};
/*
* number of buffer hash entries
*/
@ -227,8 +216,7 @@ void biodone __P((struct buf *));
void cluster_callback __P((struct buf *));
int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, struct buf **));
void cluster_wbuild __P((struct vnode *, struct buf *, long, daddr_t, int,
daddr_t));
void cluster_wbuild __P((struct vnode *, long, daddr_t, int));
void cluster_write __P((struct buf *, u_quad_t));
int physio __P((void (*)(), struct buf *, dev_t, int, u_int (*)(),
struct uio *));