Enhance reassignbuf(). When a buffer cannot be time-optimally inserted

into vnode dirtyblkhd we append it to the list instead of prepend it to
    the list in order to maintain a 'forward' locality of reference, which
    is arguably better then 'reverse'.  The original algorithm did things this
    way to but at a huge time cost.

    Enhance the append interlock for NFS writes to handle intr/soft mounts
    better.

    Fix the hysteresis for NFS async daemon I/O requests to reduce the
    number of unnecessary context switches.

    Modify handling of NFS mount options.  Any given user option that is
    too high now defaults to the kernel maximum for that option rather then
    the kernel default for that option.

Reviewed by:	 Alfred Perlstein <bright@wintelcom.net>
This commit is contained in:
Matthew Dillon 2000-01-05 05:11:37 +00:00
parent 32900e82f3
commit c37c9620cd
11 changed files with 201 additions and 43 deletions

View File

@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp)
tbp = TAILQ_FIRST(listheadp);
if (tbp == NULL ||
bp->b_lblkno == 0 ||
(bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
++reassignbufsortgood;
@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp)
} else if (reassignbufmethod == 1) {
/*
* New sorting algorithm, only handle sequential case,
* otherwise guess.
* otherwise append to end (but before metadata)
*/
if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
(tbp->b_xflags & BX_VNDIRTY)) {
/*
* Found the best place to insert the buffer
*/
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortgood;
} else {
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
/*
* Missed, append to end, but before meta-data.
* We know that the head buffer in the list is
* not meta-data due to prior conditionals.
*
* Indirect effects: NFS second stage write
* tends to wind up here, giving maximum
* distance between the unstable write and the
* commit rpc.
*/
tbp = TAILQ_LAST(listheadp, buflists);
while (tbp && tbp->b_lblkno < 0)
tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortbad;
}
} else {

View File

@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp)
tbp = TAILQ_FIRST(listheadp);
if (tbp == NULL ||
bp->b_lblkno == 0 ||
(bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
++reassignbufsortgood;
@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp)
} else if (reassignbufmethod == 1) {
/*
* New sorting algorithm, only handle sequential case,
* otherwise guess.
* otherwise append to end (but before metadata)
*/
if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
(tbp->b_xflags & BX_VNDIRTY)) {
/*
* Found the best place to insert the buffer
*/
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortgood;
} else {
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
/*
* Missed, append to end, but before meta-data.
* We know that the head buffer in the list is
* not meta-data due to prior conditionals.
*
* Indirect effects: NFS second stage write
* tends to wind up here, giving maximum
* distance between the unstable write and the
* commit rpc.
*/
tbp = TAILQ_LAST(listheadp, buflists);
while (tbp && tbp->b_lblkno < 0)
tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortbad;
}
} else {

View File

@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred)
rabp->b_flags |= B_INVAL|B_ERROR;
vfs_unbusy_pages(rabp);
brelse(rabp);
break;
}
} else
} else {
brelse(rabp);
}
}
}
}
@ -497,8 +499,19 @@ again:
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
bcount = np->n_size - (off_t)lbn * biosize;
}
if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
goto again;
if (bcount != biosize) {
switch(nfs_rslock(np, p)) {
case ENOLCK:
goto again;
/* not reached */
case EINTR:
case ERESTART:
return(EINTR);
/* not reached */
default:
break;
}
}
bp = nfs_getcacheblk(vp, lbn, bcount, p);
@ -785,8 +798,17 @@ restart:
*/
if ((ioflag & IO_APPEND) ||
uio->uio_offset + uio->uio_resid > np->n_size) {
if (nfs_rslock(np, p) == ENOLCK)
switch(nfs_rslock(np, p)) {
case ENOLCK:
goto restart;
/* not reached */
case EINTR:
case ERESTART:
return(EINTR);
/* not reached */
default:
break;
}
haverslock = 1;
}
@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp)
int slptimeo = 0;
int error;
/*
* If no async daemons then return EIO to force caller to run the rpc
* synchronously.
*/
if (nfs_numasync == 0)
return (EIO);
nmp = VFSTONFS(bp->b_vp->v_mount);
/*
* Commits are usually short and sweet so lets save some cpu and
* leave the async daemons for more important rpc's (such as reads
* and writes).
*/
if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT &&
(nmp->nm_bufqiods > nfs_numasync / 2)) {
return(EIO);
}
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@ -1244,7 +1281,8 @@ again:
*/
if (gotiod) {
/*
* Ensure that the queue never grows too large.
* Ensure that the queue never grows too large. We still want
* to asynchronize so we block rather then return EIO.
*/
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
NFS_DPF(ASYNCIO,

View File

@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp)
register struct vnode *vp;
struct vnode *nvp;
int error;
int rsflags;
struct nfsmount *nmp;
/*
* Calculate nfs mount point and figure out whether the rslock should
* be interruptable or not.
*/
nmp = VFSTONFS(mntp);
if (nmp->nm_flag & NFSMNT_INT)
rsflags = PCATCH;
else
rsflags = 0;
retry:
nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
@ -180,7 +192,7 @@ loop:
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE);
*npp = np;
if (nfs_node_hash_lock < 0)

View File

@ -969,7 +969,7 @@ nfssvc_iod(p)
/* Take one off the front of the list */
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen--;
if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
nmp->nm_bufqwant = FALSE;
wakeup(&nmp->nm_bufq);
}

View File

@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp)
if (nmp->nm_acregmin > nmp->nm_acregmax)
nmp->nm_acregmin = nmp->nm_acregmax;
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
argp->maxgrouplist <= NFS_MAXGRPS)
nmp->nm_numgrps = argp->maxgrouplist;
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
argp->readahead <= NFS_MAXRAHEAD)
nmp->nm_readahead = argp->readahead;
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
argp->leaseterm <= NQ_MAXLEASE)
nmp->nm_leaseterm = argp->leaseterm;
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
argp->deadthresh <= NQ_NEVERDEAD)
nmp->nm_deadthresh = argp->deadthresh;
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
if (argp->maxgrouplist <= NFS_MAXGRPS)
nmp->nm_numgrps = argp->maxgrouplist;
else
nmp->nm_numgrps = NFS_MAXGRPS;
}
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
if (argp->readahead <= NFS_MAXRAHEAD)
nmp->nm_readahead = argp->readahead;
else
nmp->nm_readahead = NFS_MAXRAHEAD;
}
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) {
if (argp->leaseterm <= NQ_MAXLEASE)
nmp->nm_leaseterm = argp->leaseterm;
else
nmp->nm_leaseterm = NQ_MAXLEASE;
}
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
if (argp->deadthresh <= NQ_NEVERDEAD)
nmp->nm_deadthresh = argp->deadthresh;
else
nmp->nm_deadthresh = NQ_NEVERDEAD;
}
adjsock |= ((nmp->nm_sotype != argp->sotype) ||
(nmp->nm_soproto != argp->proto));

View File

@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred)
rabp->b_flags |= B_INVAL|B_ERROR;
vfs_unbusy_pages(rabp);
brelse(rabp);
break;
}
} else
} else {
brelse(rabp);
}
}
}
}
@ -497,8 +499,19 @@ again:
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
bcount = np->n_size - (off_t)lbn * biosize;
}
if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
goto again;
if (bcount != biosize) {
switch(nfs_rslock(np, p)) {
case ENOLCK:
goto again;
/* not reached */
case EINTR:
case ERESTART:
return(EINTR);
/* not reached */
default:
break;
}
}
bp = nfs_getcacheblk(vp, lbn, bcount, p);
@ -785,8 +798,17 @@ restart:
*/
if ((ioflag & IO_APPEND) ||
uio->uio_offset + uio->uio_resid > np->n_size) {
if (nfs_rslock(np, p) == ENOLCK)
switch(nfs_rslock(np, p)) {
case ENOLCK:
goto restart;
/* not reached */
case EINTR:
case ERESTART:
return(EINTR);
/* not reached */
default:
break;
}
haverslock = 1;
}
@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp)
int slptimeo = 0;
int error;
/*
* If no async daemons then return EIO to force caller to run the rpc
* synchronously.
*/
if (nfs_numasync == 0)
return (EIO);
nmp = VFSTONFS(bp->b_vp->v_mount);
/*
* Commits are usually short and sweet so lets save some cpu and
* leave the async daemons for more important rpc's (such as reads
* and writes).
*/
if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT &&
(nmp->nm_bufqiods > nfs_numasync / 2)) {
return(EIO);
}
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@ -1244,7 +1281,8 @@ again:
*/
if (gotiod) {
/*
* Ensure that the queue never grows too large.
* Ensure that the queue never grows too large. We still want
* to asynchronize so we block rather then return EIO.
*/
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
NFS_DPF(ASYNCIO,

View File

@ -969,7 +969,7 @@ nfssvc_iod(p)
/* Take one off the front of the list */
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen--;
if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
nmp->nm_bufqwant = FALSE;
wakeup(&nmp->nm_bufq);
}

View File

@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp)
register struct vnode *vp;
struct vnode *nvp;
int error;
int rsflags;
struct nfsmount *nmp;
/*
* Calculate nfs mount point and figure out whether the rslock should
* be interruptable or not.
*/
nmp = VFSTONFS(mntp);
if (nmp->nm_flag & NFSMNT_INT)
rsflags = PCATCH;
else
rsflags = 0;
retry:
nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
@ -180,7 +192,7 @@ loop:
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE);
*npp = np;
if (nfs_node_hash_lock < 0)

View File

@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp)
if (nmp->nm_acregmin > nmp->nm_acregmax)
nmp->nm_acregmin = nmp->nm_acregmax;
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
argp->maxgrouplist <= NFS_MAXGRPS)
nmp->nm_numgrps = argp->maxgrouplist;
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
argp->readahead <= NFS_MAXRAHEAD)
nmp->nm_readahead = argp->readahead;
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
argp->leaseterm <= NQ_MAXLEASE)
nmp->nm_leaseterm = argp->leaseterm;
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
argp->deadthresh <= NQ_NEVERDEAD)
nmp->nm_deadthresh = argp->deadthresh;
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
if (argp->maxgrouplist <= NFS_MAXGRPS)
nmp->nm_numgrps = argp->maxgrouplist;
else
nmp->nm_numgrps = NFS_MAXGRPS;
}
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
if (argp->readahead <= NFS_MAXRAHEAD)
nmp->nm_readahead = argp->readahead;
else
nmp->nm_readahead = NFS_MAXRAHEAD;
}
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) {
if (argp->leaseterm <= NQ_MAXLEASE)
nmp->nm_leaseterm = argp->leaseterm;
else
nmp->nm_leaseterm = NQ_MAXLEASE;
}
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
if (argp->deadthresh <= NQ_NEVERDEAD)
nmp->nm_deadthresh = argp->deadthresh;
else
nmp->nm_deadthresh = NQ_NEVERDEAD;
}
adjsock |= ((nmp->nm_sotype != argp->sotype) ||
(nmp->nm_soproto != argp->proto));

View File

@ -969,7 +969,7 @@ nfssvc_iod(p)
/* Take one off the front of the list */
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen--;
if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
nmp->nm_bufqwant = FALSE;
wakeup(&nmp->nm_bufq);
}