Add an NFSv4.1 mount option for "use one openowner".

Some NFSv4.1 servers such as AmazonEFS can only support a small fixed number
of open_owner4s. This patch adds a mount option called "oneopenown" that
can be used for NFSv4.1 mounts to make the client do all Opens with the
same open_owner4 string. This option can only be used with NFSv4.1 and
may not work correctly when Delegations are is use.

Reported by:	cperciva
Tested by:	cperciva
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D8988
This commit is contained in:
rmacklem 2017-04-13 21:54:19 +00:00
parent c0fa855ca6
commit 6a12f641aa
7 changed files with 95 additions and 24 deletions

@ -502,8 +502,8 @@ int nfscl_open(vnode_t, u_int8_t *, int, u_int32_t, int,
int *, int *, int);
int nfscl_getstateid(vnode_t, u_int8_t *, int, u_int32_t, int, struct ucred *,
NFSPROC_T *, nfsv4stateid_t *, void **);
void nfscl_ownerrelease(struct nfsclowner *, int, int, int);
void nfscl_openrelease(struct nfsclopen *, int, int);
void nfscl_ownerrelease(struct nfsmount *, struct nfsclowner *, int, int, int);
void nfscl_openrelease(struct nfsmount *, struct nfsclopen *, int, int);
int nfscl_getcl(struct mount *, struct ucred *, NFSPROC_T *, int,
struct nfsclclient **);
struct nfsclclient *nfscl_findcl(struct nfsmount *);

@ -929,6 +929,8 @@ int newnfs_realign(struct mbuf **, int);
#define NFSHASNOLAYOUTCOMMIT(n) ((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT)
#define NFSHASSESSPERSIST(n) ((n)->nm_state & NFSSTA_SESSPERSIST)
#define NFSHASPNFS(n) ((n)->nm_state & NFSSTA_PNFS)
#define NFSHASONEOPENOWN(n) (((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 && \
(n)->nm_minorvers > 0)
/*
* Gets the stats field out of the mount structure.

@ -634,7 +634,7 @@ nfscl_filllockowner(void *id, u_int8_t *cp, int flags)
struct proc *p;
if (id == NULL) {
printf("NULL id\n");
/* Return the single open_owner of all 0 bytes. */
bzero(cp, NFSV4CL_LOCKNAMELEN);
return;
}
@ -1255,7 +1255,14 @@ nfscl_procdoesntexist(u_int8_t *own)
} tl;
struct proc *p;
pid_t pid;
int ret = 0;
int i, ret = 0;
/* For the single open_owner of all 0 bytes, just return 0. */
for (i = 0; i < NFSV4CL_LOCKNAMELEN; i++)
if (own[i] != 0)
break;
if (i == NFSV4CL_LOCKNAMELEN)
return (0);
tl.cval[0] = *own++;
tl.cval[1] = *own++;

@ -347,7 +347,7 @@ else printf(" fhl=0\n");
*/
if (!error)
op->nfso_opencnt++;
nfscl_openrelease(op, error, newone);
nfscl_openrelease(nmp, op, error, newone);
if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
error == NFSERR_BADSESSION) {
@ -1893,7 +1893,7 @@ nfsrpc_create(vnode_t dvp, char *name, int namelen, struct vattr *vap,
if (dp != NULL)
(void) nfscl_deleg(nmp->nm_mountp, owp->nfsow_clp,
(*nfhpp)->nfh_fh, (*nfhpp)->nfh_len, cred, p, &dp);
nfscl_ownerrelease(owp, error, newone, unlocked);
nfscl_ownerrelease(nmp, owp, error, newone, unlocked);
if (error == NFSERR_GRACE || error == NFSERR_STALECLIENTID ||
error == NFSERR_STALEDONTRECOVER || error == NFSERR_DELAY ||
error == NFSERR_BADSESSION) {
@ -2198,7 +2198,7 @@ nfsrpc_createv4(vnode_t dvp, char *name, int namelen, struct vattr *vap,
error = ret;
}
}
nfscl_openrelease(op, error, newone);
nfscl_openrelease(nmp, op, error, newone);
*unlockedp = 1;
}
if (nd->nd_repstat != 0 && error == 0)

@ -247,7 +247,6 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
* If none found, add the new one or return error, depending upon
* "create".
*/
nfscl_filllockowner(p->td_proc, own, F_POSIX);
NFSLOCKCLSTATE();
dp = NULL;
/* First check the delegation list */
@ -264,10 +263,17 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
}
}
if (dp != NULL)
if (dp != NULL) {
nfscl_filllockowner(p->td_proc, own, F_POSIX);
ohp = &dp->nfsdl_owner;
else
} else {
/* For NFSv4.1 and this option, use a single open_owner. */
if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
nfscl_filllockowner(NULL, own, F_POSIX);
else
nfscl_filllockowner(p->td_proc, own, F_POSIX);
ohp = &clp->nfsc_owner;
}
/* Now, search for an openowner */
LIST_FOREACH(owp, ohp, nfsow_list) {
if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN))
@ -300,9 +306,24 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
/*
* Serialize modifications to the open owner for multiple threads
* within the same process using a read/write sleep lock.
* For NFSv4.1 and a single OpenOwner, allow concurrent open operations
* by acquiring a shared lock. The close operations still use an
* exclusive lock for this case.
*/
if (lockit)
nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
if (lockit != 0) {
if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) {
/*
* Get a shared lock on the OpenOwner, but first
* wait for any pending exclusive lock, so that the
* exclusive locker gets priority.
*/
nfsv4_lock(&owp->nfsow_rwlock, 0, NULL,
NFSCLSTATEMUTEXPTR, NULL);
nfsv4_getref(&owp->nfsow_rwlock, NULL,
NFSCLSTATEMUTEXPTR, NULL);
} else
nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR);
}
NFSUNLOCKCLSTATE();
if (nowp != NULL)
FREE((caddr_t)nowp, M_NFSCLOWNER);
@ -545,7 +566,10 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
* If p != NULL, we want to search the parentage tree
* for a matching OpenOwner and use that.
*/
nfscl_filllockowner(p->td_proc, own, F_POSIX);
if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
nfscl_filllockowner(NULL, own, F_POSIX);
else
nfscl_filllockowner(p->td_proc, own, F_POSIX);
lp = NULL;
error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own,
mode, &lp, &op);
@ -679,15 +703,19 @@ nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
* with the open owner.
*/
APPLESTATIC void
nfscl_ownerrelease(struct nfsclowner *owp, __unused int error,
__unused int candelete, int unlocked)
nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp,
__unused int error, __unused int candelete, int unlocked)
{
if (owp == NULL)
return;
NFSLOCKCLSTATE();
if (!unlocked)
nfscl_lockunlock(&owp->nfsow_rwlock);
if (unlocked == 0) {
if (NFSHASONEOPENOWN(nmp))
nfsv4_relref(&owp->nfsow_rwlock);
else
nfscl_lockunlock(&owp->nfsow_rwlock);
}
nfscl_clrelease(owp->nfsow_clp);
NFSUNLOCKCLSTATE();
}
@ -696,7 +724,8 @@ nfscl_ownerrelease(struct nfsclowner *owp, __unused int error,
* Release use of an open structure under an open owner.
*/
APPLESTATIC void
nfscl_openrelease(struct nfsclopen *op, int error, int candelete)
nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error,
int candelete)
{
struct nfsclclient *clp;
struct nfsclowner *owp;
@ -705,7 +734,10 @@ nfscl_openrelease(struct nfsclopen *op, int error, int candelete)
return;
NFSLOCKCLSTATE();
owp = op->nfso_own;
nfscl_lockunlock(&owp->nfsow_rwlock);
if (NFSHASONEOPENOWN(nmp))
nfsv4_relref(&owp->nfsow_rwlock);
else
nfscl_lockunlock(&owp->nfsow_rwlock);
clp = owp->nfsow_clp;
if (error && candelete && op->nfso_opencnt == 0)
nfscl_freeopen(op, 0);
@ -997,7 +1029,10 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
} else {
nfscl_filllockowner(id, own, flags);
ownp = own;
nfscl_filllockowner(p->td_proc, openown, F_POSIX);
if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp))))
nfscl_filllockowner(NULL, openown, F_POSIX);
else
nfscl_filllockowner(p->td_proc, openown, F_POSIX);
openownp = openown;
}
if (!recovery) {
@ -1725,6 +1760,7 @@ nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
struct nfsclowner *owp, *nowp;
struct nfsclopen *op;
struct nfscllockowner *lp, *nlp;
struct nfscldeleg *dp;
NFSPROCLISTLOCK();
NFSLOCKCLSTATE();
@ -1738,6 +1774,20 @@ nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp)
if (nfscl_procdoesntexist(owp->nfsow_owner))
nfscl_cleanup_common(clp, owp->nfsow_owner);
}
/*
* For the single open_owner case, these lock owners need to be
* checked to see if they still exist separately.
* This is because nfscl_procdoesntexist() never returns true for
* the single open_owner so that the above doesn't ever call
* nfscl_cleanup_common().
*/
TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) {
LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) {
if (nfscl_procdoesntexist(lp->nfsl_owner))
nfscl_cleanup_common(clp, lp->nfsl_owner);
}
}
NFSUNLOCKCLSTATE();
NFSPROCLISTUNLOCK();
}

@ -592,6 +592,12 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
}
/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
if (nmp->nm_minorvers == 0) {
argp->flags &= ~NFSMNT_ONEOPENOWN;
nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
}
/* Re-bind if rsrvd port requested and wasn't on one */
adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
&& (argp->flags & NFSMNT_RESVPORT);
@ -727,7 +733,7 @@ static const char *nfs_opts[] = { "from", "nfs_args",
"resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
"nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
"minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
"pnfs", "wcommitsize",
"pnfs", "wcommitsize", "oneopenown",
NULL };
/*
@ -962,6 +968,8 @@ nfs_mount(struct mount *mp)
args.flags |= NFSMNT_NONCONTIGWR;
if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
args.flags |= NFSMNT_PNFS;
if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
args.flags |= NFSMNT_ONEOPENOWN;
if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
if (opt == NULL) {
vfs_mount_error(mp, "illegal readdirsize");
@ -1172,8 +1180,8 @@ nfs_mount(struct mount *mp)
/*
* When doing an update, we can't change version,
* security, switch lockd strategies or change cookie
* translation
* security, switch lockd strategies, change cookie
* translation or switch oneopenown.
*/
args.flags = (args.flags &
~(NFSMNT_NFSV3 |
@ -1181,6 +1189,7 @@ nfs_mount(struct mount *mp)
NFSMNT_KERB |
NFSMNT_INTEGRITY |
NFSMNT_PRIVACY |
NFSMNT_ONEOPENOWN |
NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
(nmp->nm_flag &
(NFSMNT_NFSV3 |
@ -1188,6 +1197,7 @@ nfs_mount(struct mount *mp)
NFSMNT_KERB |
NFSMNT_INTEGRITY |
NFSMNT_PRIVACY |
NFSMNT_ONEOPENOWN |
NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
goto out;
@ -1946,6 +1956,8 @@ void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
&blen);
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
&buf, &blen);
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
}
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
&blen);

@ -76,7 +76,7 @@ struct nfs_args {
#define NFSMNT_MAXGRPS 0x00000020 /* set maximum grouplist size */
#define NFSMNT_INT 0x00000040 /* allow interrupts on hard mount */
#define NFSMNT_NOCONN 0x00000080 /* Don't Connect the socket */
/* 0x100 free, was NFSMNT_NQNFS */
#define NFSMNT_ONEOPENOWN 0x00000100 /* Use one OpenOwner for NFSv4.1 */
#define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */
#define NFSMNT_KERB 0x00000400 /* Use RPCSEC_GSS/Krb5 */
#define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */