Make MAXBCACHEBUF a tunable called vfs.maxbcachebuf.

By making MAXBCACHEBUF a tunable, it can be increased to allow for
larger read/write data sizes for the NFS client.
The tunable is limited to MAXPHYS, which is currently 128K.
Making MAXPHYS a tunable or increasing its value is being discussed,
since it would be nice to support a read/write data size of 1Mbyte
for the NFS client when mounting the AmazonEFS file service.

Reviewed by:	kib
MFC after:	2 weeks
Relnotes:	yes
Differential Revision:	https://reviews.freebsd.org/D10991
This commit is contained in:
Rick Macklem 2017-06-17 22:24:19 +00:00
parent d0ab0ec2fa
commit d1c5e240a8
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=320062
5 changed files with 83 additions and 19 deletions

View File

@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
struct ucred *cred, NFSPROC_T *p, int callback_retry_mult)
{
int rcvreserve, sndreserve;
int pktscale;
int pktscale, pktscalesav;
struct sockaddr *saddr;
struct ucred *origcred;
CLIENT *client;
@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
pktscale = 2;
if (pktscale > 64)
pktscale = 64;
pktscalesav = pktscale;
/*
* soreserve() can fail if sb_max is too small, so shrink pktscale
* and try again if there is an error.
@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
goto out;
}
do {
if (error != 0 && pktscale > 2)
if (error != 0 && pktscale > 2) {
if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
pktscale == pktscalesav)
printf("Consider increasing kern.ipc.maxsockbuf\n");
pktscale--;
}
if (nrp->nr_sotype == SOCK_DGRAM) {
if (nmp != NULL) {
sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) *
@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
if (nrp->nr_sotype != SOCK_STREAM)
panic("nfscon sotype");
if (nmp != NULL) {
sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
sizeof (u_int32_t)) * pktscale;
rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR +
rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR +
sizeof (u_int32_t)) * pktscale;
} else {
sndreserve = rcvreserve = 1024 * pktscale;
}
}
error = soreserve(so, sndreserve, rcvreserve);
if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM &&
pktscale <= 2)
printf("Must increase kern.ipc.maxsockbuf or reduce"
" rsize, wsize\n");
} while (error != 0 && pktscale > 2);
soclose(so);
if (error) {

View File

@ -1016,7 +1016,7 @@ struct nfsreq {
};
#ifndef NFS_MAXBSIZE
#define NFS_MAXBSIZE MAXBCACHEBUF
#define NFS_MAXBSIZE (maxbcachebuf)
#endif
/*

View File

@ -4625,7 +4625,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred,
NFSPROC_T *p)
{
uint32_t crflags, *tl;
uint32_t crflags, maxval, *tl;
struct nfsrv_descript nfsd;
struct nfsrv_descript *nd = &nfsd;
int error, irdcnt;
@ -4643,8 +4643,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
/* Fill in fore channel attributes. */
NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED);
*tl++ = 0; /* Header pad size */
*tl++ = txdr_unsigned(100000); /* Max request size */
*tl++ = txdr_unsigned(100000); /* Max response size */
*tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */
*tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */
*tl++ = txdr_unsigned(4096); /* Max response size cached */
*tl++ = txdr_unsigned(20); /* Max operations */
*tl++ = txdr_unsigned(64); /* Max slots */
@ -4691,7 +4691,26 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
/* Get the fore channel slot count. */
NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
tl += 3; /* Skip the other counts. */
tl++; /* Skip the header pad size. */
/* Make sure nm_wsize is small enough. */
maxval = fxdr_unsigned(uint32_t, *tl++);
while (maxval < nmp->nm_wsize + NFS_MAXXDR) {
if (nmp->nm_wsize > 8096)
nmp->nm_wsize /= 2;
else
break;
}
/* Make sure nm_rsize is small enough. */
maxval = fxdr_unsigned(uint32_t, *tl++);
while (maxval < nmp->nm_rsize + NFS_MAXXDR) {
if (nmp->nm_rsize > 8096)
nmp->nm_rsize /= 2;
else
break;
}
sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
tl++;
sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);

View File

@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int);
static void bufkva_free(struct buf *);
static int buf_import(void *, void **, int, int);
static void buf_release(void *, void **, int);
static void maxbcachebuf_adjust(void);
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0,
SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD,
&unmapped_buf_allowed, 0,
"Permit the use of the unmapped i/o");
int maxbcachebuf = MAXBCACHEBUF;
SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0,
"Maximum size of a buffer cache block");
/*
* This lock synchronizes access to bd_request.
@ -846,6 +850,29 @@ bd_wakeup(void)
mtx_unlock(&bdlock);
}
/*
* Adjust the maxbcachbuf tunable.
*/
static void
maxbcachebuf_adjust(void)
{
int i;
/*
* maxbcachebuf must be a power of 2 >= MAXBSIZE.
*/
i = 2;
while (i * 2 <= maxbcachebuf)
i *= 2;
maxbcachebuf = i;
if (maxbcachebuf < MAXBSIZE)
maxbcachebuf = MAXBSIZE;
if (maxbcachebuf > MAXPHYS)
maxbcachebuf = MAXPHYS;
if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF)
printf("maxbcachebuf=%d\n", maxbcachebuf);
}
/*
* bd_speedup - speedup the buffer cache flushing code
*/
@ -893,6 +920,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est)
*/
physmem_est = physmem_est * (PAGE_SIZE / 1024);
maxbcachebuf_adjust();
/*
* The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
* For the first 64MB of ram nominally allocate sufficient buffers to
@ -1003,7 +1031,9 @@ bufinit(void)
struct buf *bp;
int i;
CTASSERT(MAXBCACHEBUF >= MAXBSIZE);
KASSERT(maxbcachebuf >= MAXBSIZE,
("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
MAXBSIZE));
mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF);
mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF);
for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++)
@ -1050,7 +1080,7 @@ bufinit(void)
* PAGE_SIZE.
*/
maxbufspace = (long)nbuf * BKVASIZE;
hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10);
hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10);
lobufspace = (hibufspace / 20) * 19; /* 95% */
bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2;
@ -1062,9 +1092,9 @@ bufinit(void)
* The lower 1 MiB limit is the historical upper limit for
* hirunningspace.
*/
hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF),
hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf),
16 * 1024 * 1024), 1024 * 1024);
lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF);
lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf);
/*
* Limit the amount of malloc memory since it is wired permanently into
@ -3484,9 +3514,9 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC,
("GB_KVAALLOC only makes sense with GB_UNMAPPED"));
ASSERT_VOP_LOCKED(vp, "getblk");
if (size > MAXBCACHEBUF)
panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size,
MAXBCACHEBUF);
if (size > maxbcachebuf)
panic("getblk: size(%d) > maxbcachebuf(%d)\n", size,
maxbcachebuf);
if (!unmapped_buf_allowed)
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);

View File

@ -244,9 +244,7 @@
* Filesystems can of course request smaller chunks. Actual
* backing memory uses a chunk size of a page (PAGE_SIZE).
* The default value here can be overridden on a per-architecture
* basis by defining it in <machine/param.h>. This should
* probably be done to increase its value, when MAXBCACHEBUF is
* defined as a larger value in <machine/param.h>.
* basis by defining it in <machine/param.h>.
*
* If you make BKVASIZE too small you risk seriously fragmenting
* the buffer KVM map which may slow things down a bit. If you
@ -266,6 +264,14 @@
#endif
#define BKVAMASK (BKVASIZE-1)
/*
* This variable is tuned via vfs.maxbcachebuf and is set to the value of
* MAXBCACHEBUF by default.
*/
#ifdef _KERNEL
extern int maxbcachebuf;
#endif
/*
* MAXPATHLEN defines the longest permissible path length after expanding
* symbolic links. It is used to allocate a temporary buffer from the buffer