nfsd: Add sysctl to set maximum I/O size up to 1Mbyte

Since MAXPHYS now allows the FreeBSD NFS client
to do 1Mbyte I/O operations, add a sysctl called vfs.nfsd.srvmaxio
so that the maximum NFS server I/O size can be set up to 1Mbyte.
The Linux NFS client can also do 1Mbyte I/O operations.

The default of 128Kbytes for the maximum I/O size has
not been changed for two reasons:
- kern.ipc.maxsockbuf must be increased to support 1Mbyte I/O
- The limited benchmarking I can do actually shows a drop in I/O rate
  when the I/O size is above 256Kbytes.
However, daveb@spectralogic.com reports seeing an increase
in I/O rate for the 1Mbyte I/O size vs 128Kbytes using a Linux client.

Reviewed by:	asomers
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D30826
This commit is contained in:
Rick Macklem 2021-07-16 15:01:03 -07:00
parent db4d2d7222
commit ee29e6f311
7 changed files with 104 additions and 15 deletions

View File

@ -156,7 +156,7 @@
(t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0)
#define NFS_SRVMAXDATA(n) \
(((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? \
NFS_SRVMAXIO : NFS_V2MAXDATA)
nfs_srvmaxio : NFS_V2MAXDATA)
#define NFS64BITSSET 0xffffffffffffffffull
#define NFS64BITSMINUS1 0xfffffffffffffffeull

View File

@ -76,6 +76,7 @@ void (*nfsd_call_servertimer)(void) = NULL;
void (*ncl_call_invalcaches)(struct vnode *) = NULL;
vop_advlock_t *nfs_advlock_p = NULL;
vop_reclaim_t *nfs_reclaim_p = NULL;
uint32_t nfs_srvmaxio = NFS_SRVMAXIO;
int nfs_pnfsio(task_fn_t *, void *);
@ -303,11 +304,11 @@ nfsvno_getfs(struct nfsfsinfo *sip, int isdgram)
if (isdgram)
pref = NFS_MAXDGRAMDATA;
else
pref = NFS_SRVMAXIO;
sip->fs_rtmax = NFS_SRVMAXIO;
pref = nfs_srvmaxio;
sip->fs_rtmax = nfs_srvmaxio;
sip->fs_rtpref = pref;
sip->fs_rtmult = NFS_FABLKSIZE;
sip->fs_wtmax = NFS_SRVMAXIO;
sip->fs_wtmax = nfs_srvmaxio;
sip->fs_wtpref = pref;
sip->fs_wtmult = NFS_FABLKSIZE;
sip->fs_dtpref = pref;

View File

@ -85,6 +85,7 @@ extern volatile int nfsrv_devidcnt;
extern int nfscl_debuglevel;
extern struct nfsdevicehead nfsrv_devidhead;
extern struct nfsstatsv1 nfsstatsv1;
extern uint32_t nfs_srvmaxio;
SYSCTL_DECL(_vfs_nfs);
SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, CTLFLAG_RW,
@ -2201,7 +2202,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp,
NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
attrsum += NFSX_UNSIGNED;
i = fxdr_unsigned(int, *tl);
if (compare && !(*retcmpp) && i != NFS_SRVMAXIO)
if (compare && !(*retcmpp) && i != nfs_srvmaxio)
*retcmpp = NFSERR_NOTSAME;
break;
default:
@ -3012,7 +3013,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp,
case NFSATTRBIT_LAYOUTALIGNMENT:
case NFSATTRBIT_LAYOUTBLKSIZE:
NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(NFS_SRVMAXIO);
*tl = txdr_unsigned(nfs_srvmaxio);
retnum += NFSX_UNSIGNED;
break;
case NFSATTRBIT_XATTRSUPPORT:

View File

@ -73,7 +73,6 @@
*/
#define NFS_MAXPKTHDR 404
#define NFS_MAXXDR 4096
#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR)
#define NFS_MINPACKET 20
#define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */
#define NFSV4_MINORVERSION 0 /* V4 Minor version */

View File

@ -76,6 +76,9 @@ extern struct nfsdontlisthead nfsrv_dontlisthead;
extern volatile int nfsrv_dontlistlen;
extern volatile int nfsrv_devidcnt;
extern int nfsrv_maxpnfsmirror;
extern uint32_t nfs_srvmaxio;
extern int nfs_bufpackets;
extern u_long sb_max_adj;
struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
NFSDLOCKMUTEX;
NFSSTATESPINLOCK;
@ -195,6 +198,84 @@ SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
/*
* nfs_srvmaxio can only be increased and only when the nfsd threads are
* not running. The setting must be a power of 2, with the current limit of
* 1Mbyte.
*/
static int
sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
{
int error;
u_int newsrvmaxio;
uint64_t tval;
newsrvmaxio = nfs_srvmaxio;
error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
if (error != 0 || req->newptr == NULL)
return (error);
if (newsrvmaxio == nfs_srvmaxio)
return (0);
if (newsrvmaxio < nfs_srvmaxio) {
printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
return (EINVAL);
}
if (newsrvmaxio > 1048576) {
printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
return (EINVAL);
}
if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
return (EINVAL);
}
/*
* Check that kern.ipc.maxsockbuf is large enough for
* newsrviomax, given the setting of vfs.nfs.bufpackets.
*/
if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
sb_max_adj) {
/*
* Suggest vfs.nfs.bufpackets * maximum RPC message for
* sb_max_adj.
*/
tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
/*
* Convert suggested sb_max_adj value to a suggested
* sb_max value, which is what is set via kern.ipc.maxsockbuf.
* Perform the inverse calculation of (from uipc_sockbuf.c):
* sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
* (MSIZE + MCLBYTES);
* XXX If the calculation of sb_max_adj from sb_max changes,
* this calculation must be changed as well.
*/
tval *= (MSIZE + MCLBYTES); /* Brackets for readability. */
tval += MCLBYTES - 1; /* Round up divide. */
tval /= MCLBYTES;
printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
"%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
newsrvmaxio);
return (EINVAL);
}
NFSD_LOCK();
if (newnfs_numnfsd != 0) {
NFSD_UNLOCK();
printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
"threads are running\n");
return (EINVAL);
}
nfs_srvmaxio = newsrvmaxio;
NFSD_UNLOCK();
return (0);
}
SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
#define MAX_REORDERED_RPC 16
#define NUM_HEURISTIC 1031
#define NHUSE_INIT 64

View File

@ -66,6 +66,7 @@ extern u_long sb_max_adj;
extern int nfsrv_pnfsatime;
extern int nfsrv_maxpnfsmirror;
extern int nfs_maxcopyrange;
extern uint32_t nfs_srvmaxio;
static int nfs_async = 0;
SYSCTL_DECL(_vfs_nfsd);
@ -1023,7 +1024,7 @@ nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram,
lop->lo_end = NFS64BITSSET;
}
if (retlen > NFS_SRVMAXIO || retlen < 0)
if (retlen > nfs_srvmaxio || retlen < 0)
nd->nd_repstat = EIO;
if (vnode_vtype(vp) != VREG && !nd->nd_repstat) {
if (nd->nd_flag & ND_NFSV3)
@ -4417,6 +4418,7 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
struct nfsdsession *sep = NULL;
uint32_t rdmacnt;
struct thread *p = curthread;
static bool do_printf = true;
if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
goto nfsmout;
@ -4438,12 +4440,16 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++);
if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) {
sep->sess_maxreq = sb_max_adj - NFS_MAXXDR;
printf("Consider increasing kern.ipc.maxsockbuf\n");
if (do_printf)
printf("Consider increasing kern.ipc.maxsockbuf\n");
do_printf = false;
}
sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++);
if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) {
sep->sess_maxresp = sb_max_adj - NFS_MAXXDR;
printf("Consider increasing kern.ipc.maxsockbuf\n");
if (do_printf)
printf("Consider increasing kern.ipc.maxsockbuf\n");
do_printf = false;
}
sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++);
sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++);

View File

@ -42,6 +42,7 @@ struct nfsv4lock nfsv4rootfs_lock;
time_t nfsdev_time = 0;
int nfsrv_layouthashsize;
volatile int nfsrv_layoutcnt = 0;
extern uint32_t nfs_srvmaxio;
extern int newnfs_numnfsd;
extern struct nfsstatsv1 nfsstatsv1;
@ -6898,7 +6899,7 @@ nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
/* Set the stripe size to the maximum I/O size. */
*tl++ = txdr_unsigned(NFS_SRVMAXIO & NFSFLAYUTIL_STRIPE_MASK);
*tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
*tl++ = 0; /* 1st stripe index. */
pattern_offset = 0;
txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */
@ -7964,13 +7965,13 @@ nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
*tl++ = txdr_unsigned(2); /* Two NFS Versions. */
*tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
*tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
*tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */
*tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */
*tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
*tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
*tl++ = newnfs_true; /* Tightly coupled. */
*tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
*tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
*tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max rsize. */
*tl++ = txdr_unsigned(NFS_SRVMAXIO); /* DS max wsize. */
*tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
*tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
*tl = newnfs_true; /* Tightly coupled. */
ds->nfsdev_hostnamelen = strlen(dnshost);