nfscl: Add a Linux compatible "nconnect" mount option

Linux has had an "nconnect" NFS mount option for some time.
It specifies that N (up to 16) TCP connections are to created for a mount,
instead of just one TCP connection.

A discussion on freebsd-net@ indicated that this could improve
client<-->server network bandwidth, if either the client or server
have one of the following:
- multiple network ports aggregated to-gether with lagg/lacp.
- a fast NIC that is using multiple queues
It does result in using more IP port#s and might increase server
peak load for a client.

One difference from the Linux implementation is that this implementation
uses the first TCP connection for all RPCs composed of small messages
and uses the additional TCP connections for RPCs that normally have
large messages (Read/Readdir/Write).  The Linux implementation spreads
all RPCs across all TCP connections in a round robin fashion, whereas
this implementation spreads Read/Readdir/Write across the additional
TCP connections in a round robin fashion.

Reviewed by:	markj
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D30970
This commit is contained in:
Rick Macklem 2021-07-08 17:39:04 -07:00
parent 25a66f1fb1
commit 1e0a518d65
7 changed files with 134 additions and 33 deletions

View File

@ -167,7 +167,8 @@ static int nfsv2_procid[NFS_V3NPROCS] = {
*/
int
newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls)
struct ucred *cred, NFSPROC_T *p, int callback_retry_mult, bool dotls,
struct __rpc_client **clipp)
{
int rcvreserve, sndreserve;
int pktscale, pktscalesav;
@ -420,15 +421,22 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
}
/*
* *clipp is &nrp->nr_client or &nm_aconn[nmp->nm_nextaconn].
* The latter case is for additional connections specified by the
* "nconnect" mount option. nr_mtx etc is used for these additional
* connections, as well as nr_client in the nfssockreq
* structure for the mount.
*/
mtx_lock(&nrp->nr_mtx);
if (nrp->nr_client != NULL) {
if (*clipp != NULL) {
mtx_unlock(&nrp->nr_mtx);
/*
* Someone else already connected.
*/
CLNT_RELEASE(client);
} else {
nrp->nr_client = client;
*clipp = client;
/*
* Protocols that do not require connections may be optionally
* left unconnected for servers that reply from a port other
@ -453,18 +461,34 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
* NFS disconnect. Clean up and unlink.
*/
void
newnfs_disconnect(struct nfssockreq *nrp)
newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp)
{
CLIENT *client;
CLIENT *client, *aconn[NFS_MAXNCONN - 1];
int i;
mtx_lock(&nrp->nr_mtx);
if (nrp->nr_client != NULL) {
client = nrp->nr_client;
nrp->nr_client = NULL;
if (nmp != NULL && nmp->nm_aconnect > 0) {
for (i = 0; i < nmp->nm_aconnect; i++) {
aconn[i] = nmp->nm_aconn[i];
nmp->nm_aconn[i] = NULL;
}
}
mtx_unlock(&nrp->nr_mtx);
rpc_gss_secpurge_call(client);
CLNT_CLOSE(client);
CLNT_RELEASE(client);
if (nmp != NULL && nmp->nm_aconnect > 0) {
for (i = 0; i < nmp->nm_aconnect; i++) {
if (aconn[i] != NULL) {
rpc_gss_secpurge_call(aconn[i]);
CLNT_CLOSE(aconn[i]);
CLNT_RELEASE(aconn[i]);
}
}
}
} else {
mtx_unlock(&nrp->nr_mtx);
}
@ -565,7 +589,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
int error = 0, usegssname = 0, secflavour = AUTH_SYS;
int freeslot, maxslot, reterr, slotpos, timeo;
u_int16_t procnum;
u_int trylater_delay = 1;
u_int nextconn, trylater_delay = 1;
struct nfs_feedback_arg nf;
struct timeval timo;
AUTH *auth;
@ -577,6 +601,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
struct ucred *authcred;
struct nfsclsession *sep;
uint8_t sessionid[NFSX_V4SESSIONID];
bool nextconn_set;
sep = dssep;
if (xidp != NULL)
@ -602,12 +627,24 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
}
/*
* XXX if not already connected call nfs_connect now. Longer
* term, change nfs_mount to call nfs_connect unconditionally
* and let clnt_reconnect_create handle reconnects.
* If not already connected call newnfs_connect now.
*/
if (nrp->nr_client == NULL)
newnfs_connect(nmp, nrp, cred, td, 0, false);
newnfs_connect(nmp, nrp, cred, td, 0, false, &nrp->nr_client);
nextconn_set = false;
if (nmp != NULL && nmp->nm_aconnect > 0 &&
(nd->nd_procnum == NFSPROC_READ ||
nd->nd_procnum == NFSPROC_READDIR ||
nd->nd_procnum == NFSPROC_READDIRPLUS ||
nd->nd_procnum == NFSPROC_WRITE)) {
nextconn = atomic_fetchadd_int(&nmp->nm_nextaconn, 1);
nextconn %= nmp->nm_aconnect;
nextconn_set = true;
if (nmp->nm_aconn[nextconn] == NULL)
newnfs_connect(nmp, nrp, cred, td, 0, false,
&nmp->nm_aconn[nextconn]);
}
/*
* For a client side mount, nmp is != NULL and clp == NULL. For
@ -830,6 +867,19 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
if (clp != NULL && sep != NULL)
stat = clnt_bck_call(nrp->nr_client, &ext, procnum,
nd->nd_mreq, &nd->nd_mrep, timo, sep->nfsess_xprt);
else if (nextconn_set)
/*
* When there are multiple TCP connections, send the
* RPCs with large messages on the alternate TCP
* connection(s) in a round robin fashion.
* The small RPC messages are sent on the default
* TCP connection because they do not require much
* network bandwidth and separating them from the
* large RPC messages avoids them getting "log jammed"
* behind several large RPC messages.
*/
stat = CLNT_CALL_MBUF(nmp->nm_aconn[nextconn],
&ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo);
else
stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum,
nd->nd_mreq, &nd->nd_mrep, timo);

View File

@ -3624,7 +3624,8 @@ nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p)
}
rp->nr_vers = RPCNFSUSERD_VERS;
if (error == 0)
error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0, false);
error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0, false,
&rp->nr_client);
if (error == 0) {
NFSLOCKNAMEID();
nfsrv_nfsuserd = RUNNING;
@ -3658,7 +3659,7 @@ nfsrv_nfsuserddelport(void)
msleep(&nfsrv_userdupcalls, NFSNAMEIDMUTEXPTR, PVFS,
"nfsupcalls", 0);
NFSUNLOCKNAMEID();
newnfs_disconnect(&nfsrv_nfsuserdsock);
newnfs_disconnect(NULL, &nfsrv_nfsuserdsock);
free(nfsrv_nfsuserdsock.nr_nam, M_SONAME);
NFSLOCKNAMEID();
nfsrv_nfsuserd = NOTRUNNING;

View File

@ -772,8 +772,8 @@ int newnfs_request(struct nfsrv_descript *, struct nfsmount *,
struct ucred *, u_int32_t, u_int32_t, u_char *, int, u_int64_t *,
struct nfsclsession *);
int newnfs_connect(struct nfsmount *, struct nfssockreq *,
struct ucred *, NFSPROC_T *, int, bool);
void newnfs_disconnect(struct nfssockreq *);
struct ucred *, NFSPROC_T *, int, bool, struct __rpc_client **);
void newnfs_disconnect(struct nfsmount *, struct nfssockreq *);
int newnfs_sigintr(struct nfsmount *, NFSPROC_T *);
/* nfs_nfsdkrpc.c */

View File

@ -5610,7 +5610,7 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
* unmount, but I did it anyhow.
*/
nrp->nr_cred = crhold(nmp->nm_sockreq.nr_cred);
error = newnfs_connect(nmp, nrp, NULL, p, 0, false);
error = newnfs_connect(nmp, nrp, NULL, p, 0, false, &nrp->nr_client);
NFSCL_DEBUG(3, "DS connect=%d\n", error);
dsp = NULL;
@ -5628,7 +5628,7 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
} while (error == NFSERR_MINORVERMISMATCH &&
firsttry++ == 0);
if (error != 0)
newnfs_disconnect(nrp);
newnfs_disconnect(NULL, nrp);
} else {
dsp = malloc(sizeof(struct nfsclds), M_NFSCLDS,
M_WAITOK | M_ZERO);
@ -5656,7 +5656,7 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
* If there is already a session for this
* server, use it.
*/
(void)newnfs_disconnect(nrp);
newnfs_disconnect(NULL, nrp);
nfscl_freenfsclds(dsp);
*dspp = tdsp;
return (0);
@ -5688,7 +5688,7 @@ nfsrpc_fillsa(struct nfsmount *nmp, struct sockaddr_in *sin,
NFSUNLOCKMNT(nmp);
*dspp = dsp;
} else if (dsp != NULL) {
newnfs_disconnect(nrp);
newnfs_disconnect(NULL, nrp);
nfscl_freenfsclds(dsp);
}
return (error);

View File

@ -118,7 +118,7 @@ static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
static int mountnfs(struct nfs_args *, struct mount *,
struct sockaddr *, char *, u_char *, int, u_char *, int,
u_char *, int, struct vnode **, struct ucred *,
struct thread *, int, int, int, uint32_t, char *);
struct thread *, int, int, int, uint32_t, char *, int);
static void nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
struct sockaddr_storage *, int *, off_t *,
struct timeval *);
@ -548,7 +548,7 @@ nfs_mountdiskless(char *path,
nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL)) != 0) {
NFS_DEFAULT_NEGNAMETIMEO, 0, 0, NULL, 0)) != 0) {
printf("nfs_mountroot: mount %s on /: %d\n", path, error);
return (error);
}
@ -715,14 +715,14 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
haslock = 1;
}
if (!error) {
newnfs_disconnect(&nmp->nm_sockreq);
newnfs_disconnect(nmp, &nmp->nm_sockreq);
if (haslock)
newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
nmp->nm_sotype = argp->sotype;
nmp->nm_soproto = argp->proto;
if (nmp->nm_sotype == SOCK_DGRAM)
while (newnfs_connect(nmp, &nmp->nm_sockreq,
cred, td, 0, false)) {
cred, td, 0, false, &nmp->nm_sockreq.nr_client)) {
printf("newnfs_args: retrying connect\n");
(void) nfs_catnap(PSOCK, 0, "nfscon");
}
@ -750,7 +750,7 @@ static const char *nfs_opts[] = { "from", "nfs_args",
"resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
"nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
"minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
"pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname",
"pnfs", "wcommitsize", "oneopenown", "tls", "tlscertname", "nconnect",
NULL };
/*
@ -902,6 +902,7 @@ nfs_mount(struct mount *mp)
krbnamelen, srvkrbnamelen;
size_t hstlen;
uint32_t newflag;
int aconn = 0;
has_nfs_args_opt = 0;
has_nfs_from_opt = 0;
@ -1192,6 +1193,20 @@ nfs_mount(struct mount *mp)
goto out;
}
}
if (vfs_getopt(mp->mnt_optnew, "nconnect", (void **)&opt, NULL) ==
0) {
ret = sscanf(opt, "%d", &aconn);
if (ret != 1 || aconn < 1 || aconn > NFS_MAXNCONN) {
vfs_mount_error(mp, "illegal nconnect: %s", opt);
error = EINVAL;
goto out;
}
/*
* Setting nconnect=1 is a no-op, allowed so that
* the option can be used in a Linux compatible way.
*/
aconn--;
}
if (vfs_getopt(mp->mnt_optnew, "sec",
(void **) &secname, NULL) == 0)
nfs_sec_name(secname, &args.flags);
@ -1359,10 +1374,25 @@ nfs_mount(struct mount *mp)
}
}
if (aconn > 0 && (args.sotype != SOCK_STREAM ||
(args.flags & NFSMNT_NFSV4) == 0 || minvers == 0)) {
/*
* RFC 5661 requires that an NFSv4.1/4.2 server
* send an RPC reply on the same TCP connection
* as the one it received the request on.
* This property in required for "nconnect" and
* might not be the case for NFSv3 or NFSv4.0 servers.
*/
vfs_mount_error(mp, "nconnect should only be used "
"for NFSv4.1/4.2 mounts");
error = EINVAL;
goto out;
}
args.fh = nfh;
error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
nametimeo, negnametimeo, minvers, newflag, tlscertname);
nametimeo, negnametimeo, minvers, newflag, tlscertname, aconn);
out:
if (!error) {
MNT_ILOCK(mp);
@ -1410,7 +1440,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
int minvers, uint32_t newflag, char *tlscertname)
int minvers, uint32_t newflag, char *tlscertname, int aconn)
{
struct nfsmount *nmp;
struct nfsnode *np;
@ -1577,7 +1607,8 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
else
nmp->nm_sockreq.nr_vers = NFS_VER2;
if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false)))
if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0, false,
&nmp->nm_sockreq.nr_client)))
goto bad;
/* For NFSv4, get the clientid now. */
if ((argp->flags & NFSMNT_NFSV4) != 0) {
@ -1586,6 +1617,12 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
NFSCL_DEBUG(3, "aft getcl=%d\n", error);
if (error != 0)
goto bad;
if (aconn > 0 && nmp->nm_minorvers == 0) {
vfs_mount_error(mp, "nconnect should only be used "
"for NFSv4.1/4.2 mounts");
error = EINVAL;
goto bad;
}
}
if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
@ -1680,6 +1717,10 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
MNT_IUNLOCK(mp);
}
/* Can now allow additional connections. */
if (aconn > 0)
nmp->nm_aconnect = aconn;
/*
* Lose the lock but keep the ref.
*/
@ -1692,7 +1733,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
bad:
if (clp != NULL)
nfscl_clientrelease(clp);
newnfs_disconnect(&nmp->nm_sockreq);
newnfs_disconnect(NULL, &nmp->nm_sockreq);
crfree(nmp->nm_sockreq.nr_cred);
if (nmp->nm_sockreq.nr_auth != NULL)
AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
@ -1707,7 +1748,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
dsp->nfsclds_sockp != NULL)
newnfs_disconnect(dsp->nfsclds_sockp);
newnfs_disconnect(NULL, dsp->nfsclds_sockp);
nfscl_freenfsclds(dsp);
}
free(nmp->nm_tlscertname, M_NEWNFSMNT);
@ -1793,7 +1834,7 @@ nfs_unmount(struct mount *mp, int mntflags)
msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
mtx_unlock(&nmp->nm_mtx);
newnfs_disconnect(&nmp->nm_sockreq);
newnfs_disconnect(nmp, &nmp->nm_sockreq);
crfree(nmp->nm_sockreq.nr_cred);
free(nmp->nm_nam, M_SONAME);
if (nmp->nm_sockreq.nr_auth != NULL)
@ -1803,7 +1844,7 @@ nfs_unmount(struct mount *mp, int mntflags)
TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
dsp->nfsclds_sockp != NULL)
newnfs_disconnect(dsp->nfsclds_sockp);
newnfs_disconnect(NULL, dsp->nfsclds_sockp);
nfscl_freenfsclds(dsp);
}
free(nmp->nm_tlscertname, M_NEWNFSMNT);
@ -2067,6 +2108,7 @@ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
&blen);
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
&buf, &blen);
nfscl_printoptval(nmp, nmp->nm_aconnect + 1, ",nconnect", &buf, &blen);
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
&blen);
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,

View File

@ -39,6 +39,9 @@
#include <nfs/nfs_mountcommon.h>
/* Maximum value for nm_nconnect. */
#define NFS_MAXNCONN 16
/*
* Mount structure.
* One allocated on every NFS mount.
@ -81,6 +84,11 @@ struct nfsmount {
u_int64_t nm_clval; /* identifies which clientid */
u_int64_t nm_fsid[2]; /* NFSv4 fsid */
int nm_minorvers; /* Minor version # for NFSv4 */
u_int nm_aconnect; /* additional TCP connections */
u_int nm_nextaconn; /* Next nm_aconn[] to use */
/* unclipped, wraps to 0 */
struct __rpc_client *nm_aconn[NFS_MAXNCONN - 1]; /* Additional nconn */
/* Locked via nm_sockreq.nr_mtx */
u_int16_t nm_krbnamelen; /* Krb5 host principal, if any */
u_int16_t nm_dirpathlen; /* and mount dirpath, for V4 */
u_int16_t nm_srvkrbnamelen; /* and the server's target name */

View File

@ -1368,7 +1368,7 @@ nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
NULL, 0, NULL, NULL, NULL, 0, p);
}
#endif
newnfs_disconnect(&clp->lc_req);
newnfs_disconnect(NULL, &clp->lc_req);
free(clp->lc_req.nr_nam, M_SONAME);
NFSFREEMUTEX(&clp->lc_req.nr_mtx);
free(clp->lc_stateid, M_NFSDCLIENT);
@ -4577,10 +4577,10 @@ nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
nfsrv_freesession(sep, NULL);
} else if (nd->nd_procnum == NFSV4PROC_CBNULL)
error = newnfs_connect(NULL, &clp->lc_req, cred,
NULL, 1, dotls);
NULL, 1, dotls, &clp->lc_req.nr_client);
else
error = newnfs_connect(NULL, &clp->lc_req, cred,
NULL, 3, dotls);
NULL, 3, dotls, &clp->lc_req.nr_client);
}
newnfs_sndunlock(&clp->lc_req.nr_lock);
NFSD_DEBUG(4, "aft sndunlock=%d\n", error);