Asynchronized client-side nfs_commit. NFS commit operations were

previously issued synchronously even if async daemons (nfsiod's) were
    available.  The commit has been moved from the strategy code to the doio
    code in order to asynchronize it.

    Removed use of lastr in preparation for removal of vnode->v_lastr.  It
    has been replaced with seqcount, which is already supported by the system
    and, in fact, gives us a better heuristic for sequential detection then
    lastr ever did.

    Made major performance improvements to the server side commit.  The
    server previously fsync'd the entire file for each commit rpc.  The
    server now bawrite()s only those buffers related to the offset/size
    specified in the commit rpc.

    Note that we do not commit the meta-data yet.  This works still needs
    to be done.

    Note that a further optimization can be done (and has not yet been done)
    on the client: we can merge multiple potential commit rpc's into a
    single rpc with a greater file offset/size range and greatly reduce
    rpc traffic.

Reviewed by:	Alan Cox <alc@cs.rice.edu>, David Greenman <dg@root.com>
This commit is contained in:
dillon 1999-09-17 05:57:57 +00:00
parent 04d947f55f
commit ddc4feb90c
12 changed files with 250 additions and 34 deletions

View File

@ -604,6 +604,8 @@ int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
int *));
int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
int nfs_doio __P((struct buf *, struct ucred *, struct proc *));

View File

@ -339,6 +339,7 @@ nfs_bioread(vp, uio, ioflag, cred)
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn, rabn;
int bcount;
int seqcount;
int nra, error = 0, n = 0, on = 0;
#ifdef DIAGNOSTIC
@ -350,6 +351,7 @@ nfs_bioread(vp, uio, ioflag, cred)
if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
return (EINVAL);
p = uio->uio_procp;
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
(void)nfs_fsinfo(nmp, vp, cred, p);
@ -357,6 +359,7 @@ nfs_bioread(vp, uio, ioflag, cred)
(uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
return (EFBIG);
biosize = vp->v_mount->mnt_stat.f_iosize;
seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE);
/*
* For nfs, cache consistency can only be maintained approximately.
* Although RFC1094 does not specify the criteria, the following is
@ -455,7 +458,7 @@ nfs_bioread(vp, uio, ioflag, cred)
* Start the read ahead(s), as required.
*/
if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
for (nra = 0; nra < nmp->nm_readahead &&
for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
rabn = lbn + 1 + nra;
if (!incore(vp, rabn)) {
@ -521,8 +524,6 @@ nfs_bioread(vp, uio, ioflag, cred)
n = 0;
if (on < bcount)
n = min((unsigned)(bcount - on), uio->uio_resid);
vp->v_lastr = lbn;
break;
case VLNK:
nfsstats.biocache_readlinks++;
@ -1344,6 +1345,35 @@ nfs_doio(bp, cr, p)
bp->b_error = error;
}
} else {
/*
* If we only need to commit, try to commit
*/
if (bp->b_flags & B_NEEDCOMMIT) {
int retv;
off_t off;
off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
bp->b_flags |= B_WRITEINPROG;
retv = nfs_commit(
bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
bp->b_wcred, p);
bp->b_flags &= ~B_WRITEINPROG;
if (retv == 0) {
bp->b_dirtyoff = bp->b_dirtyend = 0;
bp->b_flags &= ~B_NEEDCOMMIT;
bp->b_resid = 0;
biodone(bp);
return (0);
}
if (retv == NFSERR_STALEWRITEVERF) {
nfs_clearcommit(bp->b_vp->v_mount);
}
}
/*
* Setup for actual write
*/
if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;

View File

@ -96,6 +96,7 @@
#include <sys/stat.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/buf.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -115,6 +116,8 @@
#define nfsdbprintf(info)
#endif
#define MAX_COMMIT_COUNT (1024 * 1024)
nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
NFFIFO, NFNON };
#ifndef NFS_NOSERVER
@ -133,6 +136,10 @@ SYSCTL_DECL(_vfs_nfs);
static int nfs_async;
SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
static int nfs_commit_blks;
static int nfs_commit_miss;
SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
static int nfsrv_access __P((struct vnode *,int,struct ucred *,int,
struct proc *, int));
@ -3624,11 +3631,73 @@ nfsrv_commit(nfsd, slp, procp, mrq)
goto nfsmout;
}
for_ret = VOP_GETATTR(vp, &bfor, cred, procp);
if (vp->v_object &&
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
if (cnt > MAX_COMMIT_COUNT) {
/*
* Give up and do the whole thing
*/
if (vp->v_object &&
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
}
error = VOP_FSYNC(vp, cred, MNT_WAIT, procp);
} else {
/*
* Locate and synchronously write any buffers that fall
* into the requested range. Note: we are assuming that
* f_iosize is a power of 2.
*/
int iosize = vp->v_mount->mnt_stat.f_iosize;
int iomask = iosize - 1;
int s;
daddr_t lblkno;
/*
* Align to iosize boundry, super-align to page boundry.
*/
if (off & iomask) {
cnt += off & iomask;
off &= ~(u_quad_t)iomask;
}
if (off & PAGE_MASK) {
cnt += off & PAGE_MASK;
off &= ~(u_quad_t)PAGE_MASK;
}
lblkno = off / iosize;
if (vp->v_object &&
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
}
s = splbio();
while (cnt > 0) {
struct buf *bp;
/*
* If we have a buffer and it is marked B_DELWRI we
* have to lock and write it. Otherwise the prior
* write is assumed to have already been committed.
*/
if ((bp = gbincore(vp, lblkno)) != NULL && (bp->b_flags & B_DELWRI)) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL);
continue; /* retry */
}
bremfree(bp);
bp->b_flags &= ~B_ASYNC;
VOP_BWRITE(bp->b_vp, bp);
++nfs_commit_miss;
}
++nfs_commit_blks;
if (cnt < iosize)
break;
cnt -= iosize;
++lblkno;
}
splx(s);
}
error = VOP_FSYNC(vp, cred, MNT_WAIT, procp);
aft_ret = VOP_GETATTR(vp, &aft, cred, procp);
vput(vp);
vp = NULL;

View File

@ -220,8 +220,6 @@ static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp,
struct vattr *vap));
@ -2587,9 +2585,9 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
/*
* Nfs Version 3 commit rpc
*/
static int
int
nfs_commit(vp, offset, cnt, cred, procp)
register struct vnode *vp;
struct vnode *vp;
u_quad_t offset;
int cnt;
struct ucred *cred;
@ -3076,8 +3074,10 @@ nfs_writebp(bp, force, procp)
{
int s;
int oldflags = bp->b_flags;
#if 0
int retv = 1;
off_t off;
#endif
if (BUF_REFCNT(bp) == 0)
panic("bwrite: buffer is not locked???");
@ -3101,12 +3101,16 @@ nfs_writebp(bp, force, procp)
curproc->p_stats->p_ru.ru_oublock++;
splx(s);
vfs_busy_pages(bp, 1);
#if 0
/*
* XXX removed, moved to nfs_doio XXX
*/
/*
* If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
* an actual write will have to be scheduled via. VOP_STRATEGY().
* If B_WRITEINPROG is already set, then push it with a write anyhow.
*/
vfs_busy_pages(bp, 1);
if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
bp->b_flags |= B_WRITEINPROG;
@ -3121,12 +3125,11 @@ nfs_writebp(bp, force, procp)
nfs_clearcommit(bp->b_vp->v_mount);
}
}
if (retv) {
if (force)
bp->b_flags |= B_WRITEINPROG;
BUF_KERNPROC(bp);
VOP_STRATEGY(bp->b_vp, bp);
}
#endif
if (force)
bp->b_flags |= B_WRITEINPROG;
BUF_KERNPROC(bp);
VOP_STRATEGY(bp->b_vp, bp);
if( (oldflags & B_ASYNC) == 0) {
int rtval = biowait(bp);

View File

@ -604,6 +604,8 @@ int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
int *));
int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
int nfs_doio __P((struct buf *, struct ucred *, struct proc *));

View File

@ -339,6 +339,7 @@ nfs_bioread(vp, uio, ioflag, cred)
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn, rabn;
int bcount;
int seqcount;
int nra, error = 0, n = 0, on = 0;
#ifdef DIAGNOSTIC
@ -350,6 +351,7 @@ nfs_bioread(vp, uio, ioflag, cred)
if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
return (EINVAL);
p = uio->uio_procp;
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
(nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
(void)nfs_fsinfo(nmp, vp, cred, p);
@ -357,6 +359,7 @@ nfs_bioread(vp, uio, ioflag, cred)
(uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
return (EFBIG);
biosize = vp->v_mount->mnt_stat.f_iosize;
seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE);
/*
* For nfs, cache consistency can only be maintained approximately.
* Although RFC1094 does not specify the criteria, the following is
@ -455,7 +458,7 @@ nfs_bioread(vp, uio, ioflag, cred)
* Start the read ahead(s), as required.
*/
if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
for (nra = 0; nra < nmp->nm_readahead &&
for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
rabn = lbn + 1 + nra;
if (!incore(vp, rabn)) {
@ -521,8 +524,6 @@ nfs_bioread(vp, uio, ioflag, cred)
n = 0;
if (on < bcount)
n = min((unsigned)(bcount - on), uio->uio_resid);
vp->v_lastr = lbn;
break;
case VLNK:
nfsstats.biocache_readlinks++;
@ -1344,6 +1345,35 @@ nfs_doio(bp, cr, p)
bp->b_error = error;
}
} else {
/*
* If we only need to commit, try to commit
*/
if (bp->b_flags & B_NEEDCOMMIT) {
int retv;
off_t off;
off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
bp->b_flags |= B_WRITEINPROG;
retv = nfs_commit(
bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
bp->b_wcred, p);
bp->b_flags &= ~B_WRITEINPROG;
if (retv == 0) {
bp->b_dirtyoff = bp->b_dirtyend = 0;
bp->b_flags &= ~B_NEEDCOMMIT;
bp->b_resid = 0;
biodone(bp);
return (0);
}
if (retv == NFSERR_STALEWRITEVERF) {
nfs_clearcommit(bp->b_vp->v_mount);
}
}
/*
* Setup for actual write
*/
if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;

View File

@ -220,8 +220,6 @@ static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp,
struct vattr *vap));
@ -2587,9 +2585,9 @@ nfs_lookitup(dvp, name, len, cred, procp, npp)
/*
* Nfs Version 3 commit rpc
*/
static int
int
nfs_commit(vp, offset, cnt, cred, procp)
register struct vnode *vp;
struct vnode *vp;
u_quad_t offset;
int cnt;
struct ucred *cred;
@ -3076,8 +3074,10 @@ nfs_writebp(bp, force, procp)
{
int s;
int oldflags = bp->b_flags;
#if 0
int retv = 1;
off_t off;
#endif
if (BUF_REFCNT(bp) == 0)
panic("bwrite: buffer is not locked???");
@ -3101,12 +3101,16 @@ nfs_writebp(bp, force, procp)
curproc->p_stats->p_ru.ru_oublock++;
splx(s);
vfs_busy_pages(bp, 1);
#if 0
/*
* XXX removed, moved to nfs_doio XXX
*/
/*
* If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
* an actual write will have to be scheduled via. VOP_STRATEGY().
* If B_WRITEINPROG is already set, then push it with a write anyhow.
*/
vfs_busy_pages(bp, 1);
if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
bp->b_flags |= B_WRITEINPROG;
@ -3121,12 +3125,11 @@ nfs_writebp(bp, force, procp)
nfs_clearcommit(bp->b_vp->v_mount);
}
}
if (retv) {
if (force)
bp->b_flags |= B_WRITEINPROG;
BUF_KERNPROC(bp);
VOP_STRATEGY(bp->b_vp, bp);
}
#endif
if (force)
bp->b_flags |= B_WRITEINPROG;
BUF_KERNPROC(bp);
VOP_STRATEGY(bp->b_vp, bp);
if( (oldflags & B_ASYNC) == 0) {
int rtval = biowait(bp);

View File

@ -604,6 +604,8 @@ int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
int *));
int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
int nfs_doio __P((struct buf *, struct ucred *, struct proc *));

View File

@ -604,6 +604,8 @@ int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
int *));
int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
int nfs_doio __P((struct buf *, struct ucred *, struct proc *));

View File

@ -604,6 +604,8 @@ int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
int *));
int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
int nfs_doio __P((struct buf *, struct ucred *, struct proc *));

View File

@ -96,6 +96,7 @@
#include <sys/stat.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/buf.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -115,6 +116,8 @@
#define nfsdbprintf(info)
#endif
#define MAX_COMMIT_COUNT (1024 * 1024)
nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
NFFIFO, NFNON };
#ifndef NFS_NOSERVER
@ -133,6 +136,10 @@ SYSCTL_DECL(_vfs_nfs);
static int nfs_async;
SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
static int nfs_commit_blks;
static int nfs_commit_miss;
SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
static int nfsrv_access __P((struct vnode *,int,struct ucred *,int,
struct proc *, int));
@ -3624,11 +3631,73 @@ nfsrv_commit(nfsd, slp, procp, mrq)
goto nfsmout;
}
for_ret = VOP_GETATTR(vp, &bfor, cred, procp);
if (vp->v_object &&
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
if (cnt > MAX_COMMIT_COUNT) {
/*
* Give up and do the whole thing
*/
if (vp->v_object &&
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
}
error = VOP_FSYNC(vp, cred, MNT_WAIT, procp);
} else {
/*
* Locate and synchronously write any buffers that fall
* into the requested range. Note: we are assuming that
* f_iosize is a power of 2.
*/
int iosize = vp->v_mount->mnt_stat.f_iosize;
int iomask = iosize - 1;
int s;
daddr_t lblkno;
/*
* Align to iosize boundry, super-align to page boundry.
*/
if (off & iomask) {
cnt += off & iomask;
off &= ~(u_quad_t)iomask;
}
if (off & PAGE_MASK) {
cnt += off & PAGE_MASK;
off &= ~(u_quad_t)PAGE_MASK;
}
lblkno = off / iosize;
if (vp->v_object &&
(vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
}
s = splbio();
while (cnt > 0) {
struct buf *bp;
/*
* If we have a buffer and it is marked B_DELWRI we
* have to lock and write it. Otherwise the prior
* write is assumed to have already been committed.
*/
if ((bp = gbincore(vp, lblkno)) != NULL && (bp->b_flags & B_DELWRI)) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL);
continue; /* retry */
}
bremfree(bp);
bp->b_flags &= ~B_ASYNC;
VOP_BWRITE(bp->b_vp, bp);
++nfs_commit_miss;
}
++nfs_commit_blks;
if (cnt < iosize)
break;
cnt -= iosize;
++lblkno;
}
splx(s);
}
error = VOP_FSYNC(vp, cred, MNT_WAIT, procp);
aft_ret = VOP_GETATTR(vp, &aft, cred, procp);
vput(vp);
vp = NULL;

View File

@ -604,6 +604,8 @@ int nfs_vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *,
int nfs_readrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_writerpc __P((struct vnode *, struct uio *, struct ucred *, int *,
int *));
int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
struct ucred *cred, struct proc *procp));
int nfs_readdirrpc __P((struct vnode *, struct uio *, struct ucred *));
int nfs_asyncio __P((struct buf *, struct ucred *, struct proc *));
int nfs_doio __P((struct buf *, struct ucred *, struct proc *));