diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 76bd5843c369..059ca2a4787a 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp) tbp = TAILQ_FIRST(listheadp); if (tbp == NULL || bp->b_lblkno == 0 || + (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); ++reassignbufsortgood; @@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp) } else if (reassignbufmethod == 1) { /* * New sorting algorithm, only handle sequential case, - * otherwise guess. + * otherwise append to end (but before metadata) */ if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && (tbp->b_xflags & BX_VNDIRTY)) { + /* + * Found the best place to insert the buffer + */ TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortgood; } else { - TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); + /* + * Missed, append to end, but before meta-data. + * We know that the head buffer in the list is + * not meta-data due to prior conditionals. + * + * Indirect effects: NFS second stage write + * tends to wind up here, giving maximum + * distance between the unstable write and the + * commit rpc. + */ + tbp = TAILQ_LAST(listheadp, buflists); + while (tbp && tbp->b_lblkno < 0) + tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); + TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortbad; } } else { diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 76bd5843c369..059ca2a4787a 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp) tbp = TAILQ_FIRST(listheadp); if (tbp == NULL || bp->b_lblkno == 0 || + (bp->b_lblkno > 0 && tbp->b_lblkno < 0) || (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) { TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); ++reassignbufsortgood; @@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp) } else if (reassignbufmethod == 1) { /* * New sorting algorithm, only handle sequential case, - * otherwise guess. + * otherwise append to end (but before metadata) */ if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL && (tbp->b_xflags & BX_VNDIRTY)) { + /* + * Found the best place to insert the buffer + */ TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortgood; } else { - TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs); + /* + * Missed, append to end, but before meta-data. + * We know that the head buffer in the list is + * not meta-data due to prior conditionals. + * + * Indirect effects: NFS second stage write + * tends to wind up here, giving maximum + * distance between the unstable write and the + * commit rpc. + */ + tbp = TAILQ_LAST(listheadp, buflists); + while (tbp && tbp->b_lblkno < 0) + tbp = TAILQ_PREV(tbp, buflists, b_vnbufs); + TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs); ++reassignbufsortbad; } } else { diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index 8e99d9883c58..4b9dcec8c7ab 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred) rabp->b_flags |= B_INVAL|B_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); + break; } - } else + } else { brelse(rabp); + } } } } @@ -497,8 +499,19 @@ again: } else if ((off_t)(lbn + 1) * biosize > np->n_size) { bcount = np->n_size - (off_t)lbn * biosize; } - if (bcount != biosize && nfs_rslock(np, p) == ENOLCK) - goto again; + if (bcount != biosize) { + switch(nfs_rslock(np, p)) { + case ENOLCK: + goto again; + /* not reached */ + case EINTR: + case ERESTART: + return(EINTR); + /* not reached */ + default: + break; + } + } bp = nfs_getcacheblk(vp, lbn, bcount, p); @@ -785,8 +798,17 @@ restart: */ if ((ioflag & IO_APPEND) || uio->uio_offset + uio->uio_resid > np->n_size) { - if (nfs_rslock(np, p) == ENOLCK) + switch(nfs_rslock(np, p)) { + case ENOLCK: goto restart; + /* not reached */ + case EINTR: + case ERESTART: + return(EINTR); + /* not reached */ + default: + break; + } haverslock = 1; } @@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp) int slptimeo = 0; int error; + /* + * If no async daemons then return EIO to force caller to run the rpc + * synchronously. + */ if (nfs_numasync == 0) return (EIO); nmp = VFSTONFS(bp->b_vp->v_mount); + + /* + * Commits are usually short and sweet so lets save some cpu and + * leave the async daemons for more important rpc's (such as reads + * and writes). + */ + if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT && + (nmp->nm_bufqiods > nfs_numasync / 2)) { + return(EIO); + } + again: if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; @@ -1244,7 +1281,8 @@ again: */ if (gotiod) { /* - * Ensure that the queue never grows too large. + * Ensure that the queue never grows too large. We still want + * to asynchronize so we block rather then return EIO. */ while (nmp->nm_bufqlen >= 2*nfs_numasync) { NFS_DPF(ASYNCIO, diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c index 89cbdcec3efc..1de873908dfa 100644 --- a/sys/nfs/nfs_node.c +++ b/sys/nfs/nfs_node.c @@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp) register struct vnode *vp; struct vnode *nvp; int error; + int rsflags; + struct nfsmount *nmp; + + /* + * Calculate nfs mount point and figure out whether the rslock should + * be interruptable or not. + */ + nmp = VFSTONFS(mntp); + if (nmp->nm_flag & NFSMNT_INT) + rsflags = PCATCH; + else + rsflags = 0; retry: nhpp = NFSNOHASH(nfs_hash(fhp, fhsize)); @@ -180,7 +192,7 @@ loop: np->n_fhp = &np->n_fh; bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize); np->n_fhsize = fhsize; - lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE); + lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE); *npp = np; if (nfs_node_hash_lock < 0) diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c index da18842f1042..15c623133270 100644 --- a/sys/nfs/nfs_syscalls.c +++ b/sys/nfs/nfs_syscalls.c @@ -969,7 +969,7 @@ nfssvc_iod(p) /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen--; - if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { + if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { nmp->nm_bufqwant = FALSE; wakeup(&nmp->nm_bufq); } diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c index 9ddb428a553a..c2d365b319f8 100644 --- a/sys/nfs/nfs_vfsops.c +++ b/sys/nfs/nfs_vfsops.c @@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp) if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; - if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && - argp->maxgrouplist <= NFS_MAXGRPS) - nmp->nm_numgrps = argp->maxgrouplist; - if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && - argp->readahead <= NFS_MAXRAHEAD) - nmp->nm_readahead = argp->readahead; - if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && - argp->leaseterm <= NQ_MAXLEASE) - nmp->nm_leaseterm = argp->leaseterm; - if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && - argp->deadthresh <= NQ_NEVERDEAD) - nmp->nm_deadthresh = argp->deadthresh; + if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { + if (argp->maxgrouplist <= NFS_MAXGRPS) + nmp->nm_numgrps = argp->maxgrouplist; + else + nmp->nm_numgrps = NFS_MAXGRPS; + } + if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { + if (argp->readahead <= NFS_MAXRAHEAD) + nmp->nm_readahead = argp->readahead; + else + nmp->nm_readahead = NFS_MAXRAHEAD; + } + if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) { + if (argp->leaseterm <= NQ_MAXLEASE) + nmp->nm_leaseterm = argp->leaseterm; + else + nmp->nm_leaseterm = NQ_MAXLEASE; + } + if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) { + if (argp->deadthresh <= NQ_NEVERDEAD) + nmp->nm_deadthresh = argp->deadthresh; + else + nmp->nm_deadthresh = NQ_NEVERDEAD; + } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 8e99d9883c58..4b9dcec8c7ab 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred) rabp->b_flags |= B_INVAL|B_ERROR; vfs_unbusy_pages(rabp); brelse(rabp); + break; } - } else + } else { brelse(rabp); + } } } } @@ -497,8 +499,19 @@ again: } else if ((off_t)(lbn + 1) * biosize > np->n_size) { bcount = np->n_size - (off_t)lbn * biosize; } - if (bcount != biosize && nfs_rslock(np, p) == ENOLCK) - goto again; + if (bcount != biosize) { + switch(nfs_rslock(np, p)) { + case ENOLCK: + goto again; + /* not reached */ + case EINTR: + case ERESTART: + return(EINTR); + /* not reached */ + default: + break; + } + } bp = nfs_getcacheblk(vp, lbn, bcount, p); @@ -785,8 +798,17 @@ restart: */ if ((ioflag & IO_APPEND) || uio->uio_offset + uio->uio_resid > np->n_size) { - if (nfs_rslock(np, p) == ENOLCK) + switch(nfs_rslock(np, p)) { + case ENOLCK: goto restart; + /* not reached */ + case EINTR: + case ERESTART: + return(EINTR); + /* not reached */ + default: + break; + } haverslock = 1; } @@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp) int slptimeo = 0; int error; + /* + * If no async daemons then return EIO to force caller to run the rpc + * synchronously. + */ if (nfs_numasync == 0) return (EIO); nmp = VFSTONFS(bp->b_vp->v_mount); + + /* + * Commits are usually short and sweet so lets save some cpu and + * leave the async daemons for more important rpc's (such as reads + * and writes). + */ + if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT && + (nmp->nm_bufqiods > nfs_numasync / 2)) { + return(EIO); + } + again: if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; @@ -1244,7 +1281,8 @@ again: */ if (gotiod) { /* - * Ensure that the queue never grows too large. + * Ensure that the queue never grows too large. We still want + * to asynchronize so we block rather then return EIO. */ while (nmp->nm_bufqlen >= 2*nfs_numasync) { NFS_DPF(ASYNCIO, diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c index da18842f1042..15c623133270 100644 --- a/sys/nfsclient/nfs_nfsiod.c +++ b/sys/nfsclient/nfs_nfsiod.c @@ -969,7 +969,7 @@ nfssvc_iod(p) /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen--; - if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { + if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { nmp->nm_bufqwant = FALSE; wakeup(&nmp->nm_bufq); } diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c index 89cbdcec3efc..1de873908dfa 100644 --- a/sys/nfsclient/nfs_node.c +++ b/sys/nfsclient/nfs_node.c @@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp) register struct vnode *vp; struct vnode *nvp; int error; + int rsflags; + struct nfsmount *nmp; + + /* + * Calculate nfs mount point and figure out whether the rslock should + * be interruptable or not. + */ + nmp = VFSTONFS(mntp); + if (nmp->nm_flag & NFSMNT_INT) + rsflags = PCATCH; + else + rsflags = 0; retry: nhpp = NFSNOHASH(nfs_hash(fhp, fhsize)); @@ -180,7 +192,7 @@ loop: np->n_fhp = &np->n_fh; bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize); np->n_fhsize = fhsize; - lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE); + lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE); *npp = np; if (nfs_node_hash_lock < 0) diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index 9ddb428a553a..c2d365b319f8 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp) if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; - if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && - argp->maxgrouplist <= NFS_MAXGRPS) - nmp->nm_numgrps = argp->maxgrouplist; - if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && - argp->readahead <= NFS_MAXRAHEAD) - nmp->nm_readahead = argp->readahead; - if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && - argp->leaseterm <= NQ_MAXLEASE) - nmp->nm_leaseterm = argp->leaseterm; - if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && - argp->deadthresh <= NQ_NEVERDEAD) - nmp->nm_deadthresh = argp->deadthresh; + if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { + if (argp->maxgrouplist <= NFS_MAXGRPS) + nmp->nm_numgrps = argp->maxgrouplist; + else + nmp->nm_numgrps = NFS_MAXGRPS; + } + if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { + if (argp->readahead <= NFS_MAXRAHEAD) + nmp->nm_readahead = argp->readahead; + else + nmp->nm_readahead = NFS_MAXRAHEAD; + } + if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) { + if (argp->leaseterm <= NQ_MAXLEASE) + nmp->nm_leaseterm = argp->leaseterm; + else + nmp->nm_leaseterm = NQ_MAXLEASE; + } + if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) { + if (argp->deadthresh <= NQ_NEVERDEAD) + nmp->nm_deadthresh = argp->deadthresh; + else + nmp->nm_deadthresh = NQ_NEVERDEAD; + } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c index da18842f1042..15c623133270 100644 --- a/sys/nfsserver/nfs_syscalls.c +++ b/sys/nfsserver/nfs_syscalls.c @@ -969,7 +969,7 @@ nfssvc_iod(p) /* Take one off the front of the list */ TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); nmp->nm_bufqlen--; - if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { + if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) { nmp->nm_bufqwant = FALSE; wakeup(&nmp->nm_bufq); }