Enhance reassignbuf(). When a buffer cannot be time-optimally inserted
into vnode dirtyblkhd we append it to the list instead of prepend it to the list in order to maintain a 'forward' locality of reference, which is arguably better then 'reverse'. The original algorithm did things this way to but at a huge time cost. Enhance the append interlock for NFS writes to handle intr/soft mounts better. Fix the hysteresis for NFS async daemon I/O requests to reduce the number of unnecessary context switches. Modify handling of NFS mount options. Any given user option that is too high now defaults to the kernel maximum for that option rather then the kernel default for that option. Reviewed by: Alfred Perlstein <bright@wintelcom.net>
This commit is contained in:
parent
32900e82f3
commit
c37c9620cd
@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp)
|
||||
tbp = TAILQ_FIRST(listheadp);
|
||||
if (tbp == NULL ||
|
||||
bp->b_lblkno == 0 ||
|
||||
(bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
|
||||
(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
|
||||
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
|
||||
++reassignbufsortgood;
|
||||
@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp)
|
||||
} else if (reassignbufmethod == 1) {
|
||||
/*
|
||||
* New sorting algorithm, only handle sequential case,
|
||||
* otherwise guess.
|
||||
* otherwise append to end (but before metadata)
|
||||
*/
|
||||
if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
|
||||
(tbp->b_xflags & BX_VNDIRTY)) {
|
||||
/*
|
||||
* Found the best place to insert the buffer
|
||||
*/
|
||||
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
|
||||
++reassignbufsortgood;
|
||||
} else {
|
||||
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
|
||||
/*
|
||||
* Missed, append to end, but before meta-data.
|
||||
* We know that the head buffer in the list is
|
||||
* not meta-data due to prior conditionals.
|
||||
*
|
||||
* Indirect effects: NFS second stage write
|
||||
* tends to wind up here, giving maximum
|
||||
* distance between the unstable write and the
|
||||
* commit rpc.
|
||||
*/
|
||||
tbp = TAILQ_LAST(listheadp, buflists);
|
||||
while (tbp && tbp->b_lblkno < 0)
|
||||
tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
|
||||
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
|
||||
++reassignbufsortbad;
|
||||
}
|
||||
} else {
|
||||
|
@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp)
|
||||
tbp = TAILQ_FIRST(listheadp);
|
||||
if (tbp == NULL ||
|
||||
bp->b_lblkno == 0 ||
|
||||
(bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
|
||||
(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
|
||||
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
|
||||
++reassignbufsortgood;
|
||||
@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp)
|
||||
} else if (reassignbufmethod == 1) {
|
||||
/*
|
||||
* New sorting algorithm, only handle sequential case,
|
||||
* otherwise guess.
|
||||
* otherwise append to end (but before metadata)
|
||||
*/
|
||||
if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
|
||||
(tbp->b_xflags & BX_VNDIRTY)) {
|
||||
/*
|
||||
* Found the best place to insert the buffer
|
||||
*/
|
||||
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
|
||||
++reassignbufsortgood;
|
||||
} else {
|
||||
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
|
||||
/*
|
||||
* Missed, append to end, but before meta-data.
|
||||
* We know that the head buffer in the list is
|
||||
* not meta-data due to prior conditionals.
|
||||
*
|
||||
* Indirect effects: NFS second stage write
|
||||
* tends to wind up here, giving maximum
|
||||
* distance between the unstable write and the
|
||||
* commit rpc.
|
||||
*/
|
||||
tbp = TAILQ_LAST(listheadp, buflists);
|
||||
while (tbp && tbp->b_lblkno < 0)
|
||||
tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
|
||||
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
|
||||
++reassignbufsortbad;
|
||||
}
|
||||
} else {
|
||||
|
@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred)
|
||||
rabp->b_flags |= B_INVAL|B_ERROR;
|
||||
vfs_unbusy_pages(rabp);
|
||||
brelse(rabp);
|
||||
break;
|
||||
}
|
||||
} else
|
||||
} else {
|
||||
brelse(rabp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -497,8 +499,19 @@ again:
|
||||
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
|
||||
bcount = np->n_size - (off_t)lbn * biosize;
|
||||
}
|
||||
if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
|
||||
goto again;
|
||||
if (bcount != biosize) {
|
||||
switch(nfs_rslock(np, p)) {
|
||||
case ENOLCK:
|
||||
goto again;
|
||||
/* not reached */
|
||||
case EINTR:
|
||||
case ERESTART:
|
||||
return(EINTR);
|
||||
/* not reached */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bp = nfs_getcacheblk(vp, lbn, bcount, p);
|
||||
|
||||
@ -785,8 +798,17 @@ restart:
|
||||
*/
|
||||
if ((ioflag & IO_APPEND) ||
|
||||
uio->uio_offset + uio->uio_resid > np->n_size) {
|
||||
if (nfs_rslock(np, p) == ENOLCK)
|
||||
switch(nfs_rslock(np, p)) {
|
||||
case ENOLCK:
|
||||
goto restart;
|
||||
/* not reached */
|
||||
case EINTR:
|
||||
case ERESTART:
|
||||
return(EINTR);
|
||||
/* not reached */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
haverslock = 1;
|
||||
}
|
||||
|
||||
@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp)
|
||||
int slptimeo = 0;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* If no async daemons then return EIO to force caller to run the rpc
|
||||
* synchronously.
|
||||
*/
|
||||
if (nfs_numasync == 0)
|
||||
return (EIO);
|
||||
|
||||
nmp = VFSTONFS(bp->b_vp->v_mount);
|
||||
|
||||
/*
|
||||
* Commits are usually short and sweet so lets save some cpu and
|
||||
* leave the async daemons for more important rpc's (such as reads
|
||||
* and writes).
|
||||
*/
|
||||
if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT &&
|
||||
(nmp->nm_bufqiods > nfs_numasync / 2)) {
|
||||
return(EIO);
|
||||
}
|
||||
|
||||
again:
|
||||
if (nmp->nm_flag & NFSMNT_INT)
|
||||
slpflag = PCATCH;
|
||||
@ -1244,7 +1281,8 @@ again:
|
||||
*/
|
||||
if (gotiod) {
|
||||
/*
|
||||
* Ensure that the queue never grows too large.
|
||||
* Ensure that the queue never grows too large. We still want
|
||||
* to asynchronize so we block rather then return EIO.
|
||||
*/
|
||||
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
|
||||
NFS_DPF(ASYNCIO,
|
||||
|
@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp)
|
||||
register struct vnode *vp;
|
||||
struct vnode *nvp;
|
||||
int error;
|
||||
int rsflags;
|
||||
struct nfsmount *nmp;
|
||||
|
||||
/*
|
||||
* Calculate nfs mount point and figure out whether the rslock should
|
||||
* be interruptable or not.
|
||||
*/
|
||||
nmp = VFSTONFS(mntp);
|
||||
if (nmp->nm_flag & NFSMNT_INT)
|
||||
rsflags = PCATCH;
|
||||
else
|
||||
rsflags = 0;
|
||||
|
||||
retry:
|
||||
nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
|
||||
@ -180,7 +192,7 @@ loop:
|
||||
np->n_fhp = &np->n_fh;
|
||||
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
|
||||
np->n_fhsize = fhsize;
|
||||
lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
|
||||
lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE);
|
||||
*npp = np;
|
||||
|
||||
if (nfs_node_hash_lock < 0)
|
||||
|
@ -969,7 +969,7 @@ nfssvc_iod(p)
|
||||
/* Take one off the front of the list */
|
||||
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
|
||||
nmp->nm_bufqlen--;
|
||||
if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
|
||||
if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
|
||||
nmp->nm_bufqwant = FALSE;
|
||||
wakeup(&nmp->nm_bufq);
|
||||
}
|
||||
|
@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp)
|
||||
if (nmp->nm_acregmin > nmp->nm_acregmax)
|
||||
nmp->nm_acregmin = nmp->nm_acregmax;
|
||||
|
||||
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
|
||||
argp->maxgrouplist <= NFS_MAXGRPS)
|
||||
nmp->nm_numgrps = argp->maxgrouplist;
|
||||
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
|
||||
argp->readahead <= NFS_MAXRAHEAD)
|
||||
nmp->nm_readahead = argp->readahead;
|
||||
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
|
||||
argp->leaseterm <= NQ_MAXLEASE)
|
||||
nmp->nm_leaseterm = argp->leaseterm;
|
||||
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
|
||||
argp->deadthresh <= NQ_NEVERDEAD)
|
||||
nmp->nm_deadthresh = argp->deadthresh;
|
||||
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
|
||||
if (argp->maxgrouplist <= NFS_MAXGRPS)
|
||||
nmp->nm_numgrps = argp->maxgrouplist;
|
||||
else
|
||||
nmp->nm_numgrps = NFS_MAXGRPS;
|
||||
}
|
||||
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
|
||||
if (argp->readahead <= NFS_MAXRAHEAD)
|
||||
nmp->nm_readahead = argp->readahead;
|
||||
else
|
||||
nmp->nm_readahead = NFS_MAXRAHEAD;
|
||||
}
|
||||
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) {
|
||||
if (argp->leaseterm <= NQ_MAXLEASE)
|
||||
nmp->nm_leaseterm = argp->leaseterm;
|
||||
else
|
||||
nmp->nm_leaseterm = NQ_MAXLEASE;
|
||||
}
|
||||
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
|
||||
if (argp->deadthresh <= NQ_NEVERDEAD)
|
||||
nmp->nm_deadthresh = argp->deadthresh;
|
||||
else
|
||||
nmp->nm_deadthresh = NQ_NEVERDEAD;
|
||||
}
|
||||
|
||||
adjsock |= ((nmp->nm_sotype != argp->sotype) ||
|
||||
(nmp->nm_soproto != argp->proto));
|
||||
|
@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred)
|
||||
rabp->b_flags |= B_INVAL|B_ERROR;
|
||||
vfs_unbusy_pages(rabp);
|
||||
brelse(rabp);
|
||||
break;
|
||||
}
|
||||
} else
|
||||
} else {
|
||||
brelse(rabp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -497,8 +499,19 @@ again:
|
||||
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
|
||||
bcount = np->n_size - (off_t)lbn * biosize;
|
||||
}
|
||||
if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
|
||||
goto again;
|
||||
if (bcount != biosize) {
|
||||
switch(nfs_rslock(np, p)) {
|
||||
case ENOLCK:
|
||||
goto again;
|
||||
/* not reached */
|
||||
case EINTR:
|
||||
case ERESTART:
|
||||
return(EINTR);
|
||||
/* not reached */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bp = nfs_getcacheblk(vp, lbn, bcount, p);
|
||||
|
||||
@ -785,8 +798,17 @@ restart:
|
||||
*/
|
||||
if ((ioflag & IO_APPEND) ||
|
||||
uio->uio_offset + uio->uio_resid > np->n_size) {
|
||||
if (nfs_rslock(np, p) == ENOLCK)
|
||||
switch(nfs_rslock(np, p)) {
|
||||
case ENOLCK:
|
||||
goto restart;
|
||||
/* not reached */
|
||||
case EINTR:
|
||||
case ERESTART:
|
||||
return(EINTR);
|
||||
/* not reached */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
haverslock = 1;
|
||||
}
|
||||
|
||||
@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp)
|
||||
int slptimeo = 0;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* If no async daemons then return EIO to force caller to run the rpc
|
||||
* synchronously.
|
||||
*/
|
||||
if (nfs_numasync == 0)
|
||||
return (EIO);
|
||||
|
||||
nmp = VFSTONFS(bp->b_vp->v_mount);
|
||||
|
||||
/*
|
||||
* Commits are usually short and sweet so lets save some cpu and
|
||||
* leave the async daemons for more important rpc's (such as reads
|
||||
* and writes).
|
||||
*/
|
||||
if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT &&
|
||||
(nmp->nm_bufqiods > nfs_numasync / 2)) {
|
||||
return(EIO);
|
||||
}
|
||||
|
||||
again:
|
||||
if (nmp->nm_flag & NFSMNT_INT)
|
||||
slpflag = PCATCH;
|
||||
@ -1244,7 +1281,8 @@ again:
|
||||
*/
|
||||
if (gotiod) {
|
||||
/*
|
||||
* Ensure that the queue never grows too large.
|
||||
* Ensure that the queue never grows too large. We still want
|
||||
* to asynchronize so we block rather then return EIO.
|
||||
*/
|
||||
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
|
||||
NFS_DPF(ASYNCIO,
|
||||
|
@ -969,7 +969,7 @@ nfssvc_iod(p)
|
||||
/* Take one off the front of the list */
|
||||
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
|
||||
nmp->nm_bufqlen--;
|
||||
if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
|
||||
if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
|
||||
nmp->nm_bufqwant = FALSE;
|
||||
wakeup(&nmp->nm_bufq);
|
||||
}
|
||||
|
@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp)
|
||||
register struct vnode *vp;
|
||||
struct vnode *nvp;
|
||||
int error;
|
||||
int rsflags;
|
||||
struct nfsmount *nmp;
|
||||
|
||||
/*
|
||||
* Calculate nfs mount point and figure out whether the rslock should
|
||||
* be interruptable or not.
|
||||
*/
|
||||
nmp = VFSTONFS(mntp);
|
||||
if (nmp->nm_flag & NFSMNT_INT)
|
||||
rsflags = PCATCH;
|
||||
else
|
||||
rsflags = 0;
|
||||
|
||||
retry:
|
||||
nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
|
||||
@ -180,7 +192,7 @@ loop:
|
||||
np->n_fhp = &np->n_fh;
|
||||
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
|
||||
np->n_fhsize = fhsize;
|
||||
lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
|
||||
lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE);
|
||||
*npp = np;
|
||||
|
||||
if (nfs_node_hash_lock < 0)
|
||||
|
@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp)
|
||||
if (nmp->nm_acregmin > nmp->nm_acregmax)
|
||||
nmp->nm_acregmin = nmp->nm_acregmax;
|
||||
|
||||
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
|
||||
argp->maxgrouplist <= NFS_MAXGRPS)
|
||||
nmp->nm_numgrps = argp->maxgrouplist;
|
||||
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
|
||||
argp->readahead <= NFS_MAXRAHEAD)
|
||||
nmp->nm_readahead = argp->readahead;
|
||||
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
|
||||
argp->leaseterm <= NQ_MAXLEASE)
|
||||
nmp->nm_leaseterm = argp->leaseterm;
|
||||
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
|
||||
argp->deadthresh <= NQ_NEVERDEAD)
|
||||
nmp->nm_deadthresh = argp->deadthresh;
|
||||
if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
|
||||
if (argp->maxgrouplist <= NFS_MAXGRPS)
|
||||
nmp->nm_numgrps = argp->maxgrouplist;
|
||||
else
|
||||
nmp->nm_numgrps = NFS_MAXGRPS;
|
||||
}
|
||||
if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
|
||||
if (argp->readahead <= NFS_MAXRAHEAD)
|
||||
nmp->nm_readahead = argp->readahead;
|
||||
else
|
||||
nmp->nm_readahead = NFS_MAXRAHEAD;
|
||||
}
|
||||
if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) {
|
||||
if (argp->leaseterm <= NQ_MAXLEASE)
|
||||
nmp->nm_leaseterm = argp->leaseterm;
|
||||
else
|
||||
nmp->nm_leaseterm = NQ_MAXLEASE;
|
||||
}
|
||||
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
|
||||
if (argp->deadthresh <= NQ_NEVERDEAD)
|
||||
nmp->nm_deadthresh = argp->deadthresh;
|
||||
else
|
||||
nmp->nm_deadthresh = NQ_NEVERDEAD;
|
||||
}
|
||||
|
||||
adjsock |= ((nmp->nm_sotype != argp->sotype) ||
|
||||
(nmp->nm_soproto != argp->proto));
|
||||
|
@ -969,7 +969,7 @@ nfssvc_iod(p)
|
||||
/* Take one off the front of the list */
|
||||
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
|
||||
nmp->nm_bufqlen--;
|
||||
if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
|
||||
if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
|
||||
nmp->nm_bufqwant = FALSE;
|
||||
wakeup(&nmp->nm_bufq);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user