diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 110ba7932f7b..b9dd64cbd299 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -127,7 +127,7 @@ SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0, static int lobufspace; SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, "Minimum amount of buffers we want to have"); -static int hibufspace; +int hibufspace; SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, "Maximum allowed value of bufspace (excluding buf_daemon)"); static int bufreusecnt; diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 17c3c3d9516a..4eb461dba075 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -873,6 +873,14 @@ nfs_write(struct vop_write_args *ap) */ if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { + /* + * Require non-blocking, synchronous writes to + * dirty files to inform the program it needs + * to fsync(2) explicitly. + */ + if (ioflag & IO_NDELAY) + return (EAGAIN); +flush_and_restart: np->n_attrstamp = 0; error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) @@ -953,6 +961,63 @@ restart: } biosize = vp->v_mount->mnt_stat.f_iosize; + /* + * Find all of this file's B_NEEDCOMMIT buffers. If our writes + * would exceed the local maximum per-file write commit size when + * combined with those, we must decide whether to flush, + * go synchronous, or return error. We don't bother checking + * IO_UNIT -- we just make all writes atomic anyway, as there's + * no point optimizing for something that really won't ever happen. + */ + if (!(ioflag & IO_SYNC)) { + int needrestart = 0; + if (nmp->nm_wcommitsize < uio->uio_resid) { + /* + * If this request could not possibly be completed + * without exceeding the maximum outstanding write + * commit size, see if we can convert it into a + * synchronous write operation. + */ + if (ioflag & IO_NDELAY) + return (EAGAIN); + ioflag |= IO_SYNC; + if (np->n_flag & NMODIFIED) + needrestart = 1; + } else if (np->n_flag & NMODIFIED) { + int wouldcommit = 0; + BO_LOCK(&vp->v_bufobj); + if (vp->v_bufobj.bo_dirty.bv_cnt != 0) { + TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, + b_bobufs) { + if (bp->b_flags & B_NEEDCOMMIT) + wouldcommit += bp->b_bcount; + } + } + BO_UNLOCK(&vp->v_bufobj); + /* + * Since we're not operating synchronously and + * bypassing the buffer cache, we are in a commit + * and holding all of these buffers whether + * transmitted or not. If not limited, this + * will lead to the buffer cache deadlocking, + * as no one else can flush our uncommitted buffers. + */ + wouldcommit += uio->uio_resid; + /* + * If we would initially exceed the maximum + * outstanding write commit size, flush and restart. + */ + if (wouldcommit > nmp->nm_wcommitsize) + needrestart = 1; + } + if (needrestart) { + if (haverslock) { + nfs_rsunlock(np, td); + haverslock = 0; + } + goto flush_and_restart; + } + } do { nfsstats.biocache_writes++; diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index e7a3a1ac3a27..9cf9b2b45db5 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #include @@ -633,6 +635,12 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp) else nmp->nm_readahead = NFS_MAXRAHEAD; } + if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { + if (argp->wcommitsize < nmp->nm_wsize) + nmp->nm_wcommitsize = nmp->nm_wsize; + else + nmp->nm_wcommitsize = argp->wcommitsize; + } if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) { if (argp->deadthresh <= NFS_MAXDEADTHRESH) nmp->nm_deadthresh = argp->deadthresh; @@ -815,6 +823,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; } + nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); nmp->nm_readdirsize = NFS_READDIRSIZE; nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_readahead = NFS_DEFRAHEAD; diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h index 8aa2392d8c94..7ebf1a058fb7 100644 --- a/sys/nfsclient/nfsargs.h +++ b/sys/nfsclient/nfsargs.h @@ -56,7 +56,7 @@ struct nfs_args { int retrans; /* times to retry send */ int maxgrouplist; /* Max. size of group list */ int readahead; /* # of blocks to readahead */ - int __pad1; /* was "leaseterm" */ + int wcommitsize; /* Max. write commit size in bytes */ int deadthresh; /* Retrans threshold */ char *hostname; /* server's name */ int acregmin; /* cache attrs for reg files min time */ @@ -80,7 +80,7 @@ struct nfs_args { #define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */ /* 0x400 free, was NFSMNT_KERB */ #define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */ -/* 0x1000 free, was NFSMNT_LEASETERM */ +#define NFSMNT_WCOMMITSIZE 0x00001000 /* set max write commit size */ #define NFSMNT_READAHEAD 0x00002000 /* set read ahead */ #define NFSMNT_DEADTHRESH 0x00004000 /* set dead server retry thresh */ #define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */ diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h index 7f23aaa01275..a8bcc1422367 100644 --- a/sys/nfsclient/nfsmount.h +++ b/sys/nfsclient/nfsmount.h @@ -74,6 +74,7 @@ struct nfsmount { int nm_wsize; /* Max size of write rpc */ int nm_readdirsize; /* Size of a readdir rpc */ int nm_readahead; /* Num. of blocks to readahead */ + int nm_wcommitsize; /* Max size of commit for write */ int nm_acdirmin; /* Directory attr cache min lifetime */ int nm_acdirmax; /* Directory attr cache max lifetime */ int nm_acregmin; /* Reg file attr cache min lifetime */ diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 8f68ff29acde..9f8fc2f4c91c 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -465,6 +465,7 @@ extern int nbuf; /* The number of buffer headers */ extern int maxswzone; /* Max KVA for swap structures */ extern int maxbcache; /* Max KVA for buffer cache */ extern int runningbufspace; +extern int hibufspace; extern int buf_maxio; /* nominal maximum I/O for buffer */ extern struct buf *buf; /* The buffer headers. */ extern char *buffers; /* The buffer contents. */ diff --git a/sys/sys/bufobj.h b/sys/sys/bufobj.h index f938c80697e4..cab7fb002299 100644 --- a/sys/sys/bufobj.h +++ b/sys/sys/bufobj.h @@ -109,13 +109,13 @@ struct bufobj { #define BO_LOCK(bo) \ do { \ - KASSERT (bo->bo_mtx != NULL, ("No lock in bufobj")); \ + KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \ mtx_lock((bo)->bo_mtx); \ } while (0) #define BO_UNLOCK(bo) \ do { \ - KASSERT (bo->bo_mtx != NULL, ("No lock in bufobj")); \ + KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \ mtx_unlock((bo)->bo_mtx); \ } while (0)