Fix two races while handling nfsuserd daemon start/stop.

A crash was reported where the nr_client field was NULL during an upcall
to the nfsuserd daemon. Since nr_client == NULL only occurs when the
nfsuserd daemon is being shut down, it appeared to be caused by a race
between doing an upcall and the daemon shutting down.
By inspection two races were identified:
1 - The nfsrv_nfsuserd variable is used to indicate whether or not the
    daemon is running. However it did not handle the intermediate phase
    where the daemon is starting or stopping.

    This was fixed by making nfsrv_nfsuserd tri-state and having the
    functions that are called during start/stop to obey the intermediate
    state.

2 - nfsrv_nfsuserd was checked to see that the daemon was running at
    the beginning of an upcall, but nothing prevented the daemon from
    being shut down while an upcall was still in progress.
    This race probably caused the crash.

    The patch fixes this by adding a count of upcalls in progress and
    having the shut down function delay until this count goes to zero
    before getting rid of nr_client and related data used by an upcall.

Tested by:	avg (Panzura QA)
Reported by:	avg
Reviewed by:	avg
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D22377
This commit is contained in:
Rick Macklem 2019-11-28 23:34:23 +00:00
parent ef4a351452
commit e1cda5eea6
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=355194
4 changed files with 44 additions and 11 deletions

View File

@ -797,6 +797,9 @@ struct nfsslot {
struct mbuf *nfssl_reply;
};
/* Enumerated type for nfsuserd state. */
typedef enum { NOTRUNNING=0, STARTSTOP=1, RUNNING=2 } nfsuserd_state;
#endif /* _KERNEL */
#endif /* _NFS_NFS_H */

View File

@ -56,7 +56,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
extern int nfscl_ticks;
extern int nfsrv_nfsuserd;
extern nfsuserd_state nfsrv_nfsuserd;
extern struct nfssockreq nfsrv_nfsuserdsock;
extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *,
struct thread *);
@ -774,7 +774,7 @@ nfscommon_modevent(module_t mod, int type, void *data)
break;
case MOD_UNLOAD:
if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != 0 ||
if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != NOTRUNNING ||
nfs_numnfscbd != 0) {
error = EBUSY;
break;

View File

@ -64,7 +64,8 @@ struct timeval nfsboottime; /* Copy boottime once, so it never changes */
int nfscl_ticks;
int nfsrv_useacl = 1;
struct nfssockreq nfsrv_nfsuserdsock;
int nfsrv_nfsuserd = 0;
nfsuserd_state nfsrv_nfsuserd = NOTRUNNING;
static int nfsrv_userdupcalls = 0;
struct nfsreqhead nfsd_reqq;
uid_t nfsrv_defaultuid = UID_NOBODY;
gid_t nfsrv_defaultgid = GID_NOGROUP;
@ -3522,18 +3523,22 @@ nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p)
int error;
NFSLOCKNAMEID();
if (nfsrv_nfsuserd) {
if (nfsrv_nfsuserd != NOTRUNNING) {
NFSUNLOCKNAMEID();
error = EPERM;
goto out;
}
nfsrv_nfsuserd = 1;
NFSUNLOCKNAMEID();
nfsrv_nfsuserd = STARTSTOP;
/*
* Set up the socket record and connect.
* Set nr_client NULL before unlocking, just to ensure that no other
* process/thread/core will use a bogus old value. This could only
* occur if the use of the nameid lock to protect nfsrv_nfsuserd is
* broken.
*/
rp = &nfsrv_nfsuserdsock;
rp->nr_client = NULL;
NFSUNLOCKNAMEID();
rp->nr_sotype = SOCK_DGRAM;
rp->nr_soproto = IPPROTO_UDP;
rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST);
@ -3569,9 +3574,15 @@ nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p)
rp->nr_vers = RPCNFSUSERD_VERS;
if (error == 0)
error = newnfs_connect(NULL, rp, NFSPROCCRED(p), p, 0);
if (error) {
if (error == 0) {
NFSLOCKNAMEID();
nfsrv_nfsuserd = RUNNING;
NFSUNLOCKNAMEID();
} else {
free(rp->nr_nam, M_SONAME);
nfsrv_nfsuserd = 0;
NFSLOCKNAMEID();
nfsrv_nfsuserd = NOTRUNNING;
NFSUNLOCKNAMEID();
}
out:
NFSEXITCODE(error);
@ -3586,14 +3597,21 @@ nfsrv_nfsuserddelport(void)
{
NFSLOCKNAMEID();
if (nfsrv_nfsuserd == 0) {
if (nfsrv_nfsuserd != RUNNING) {
NFSUNLOCKNAMEID();
return;
}
nfsrv_nfsuserd = 0;
nfsrv_nfsuserd = STARTSTOP;
/* Wait for all upcalls to complete. */
while (nfsrv_userdupcalls > 0)
msleep(&nfsrv_userdupcalls, NFSNAMEIDMUTEXPTR, PVFS,
"nfsupcalls", 0);
NFSUNLOCKNAMEID();
newnfs_disconnect(&nfsrv_nfsuserdsock);
free(nfsrv_nfsuserdsock.nr_nam, M_SONAME);
NFSLOCKNAMEID();
nfsrv_nfsuserd = NOTRUNNING;
NFSUNLOCKNAMEID();
}
/*
@ -3612,12 +3630,19 @@ nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name)
int error;
NFSLOCKNAMEID();
if (nfsrv_nfsuserd == 0) {
if (nfsrv_nfsuserd != RUNNING) {
NFSUNLOCKNAMEID();
error = EPERM;
goto out;
}
/*
* Maintain a count of upcalls in progress, so that nfsrv_X()
* can wait until no upcalls are in progress.
*/
nfsrv_userdupcalls++;
NFSUNLOCKNAMEID();
KASSERT(nfsrv_userdupcalls > 0,
("nfsrv_getuser: non-positive upcalls"));
nd = &nfsd;
cred = newnfs_getcred();
nd->nd_flag = ND_GSSINITREPLY;
@ -3636,6 +3661,10 @@ nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name)
}
error = newnfs_request(nd, NULL, NULL, &nfsrv_nfsuserdsock, NULL, NULL,
cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL);
NFSLOCKNAMEID();
if (--nfsrv_userdupcalls == 0 && nfsrv_nfsuserd == STARTSTOP)
wakeup(&nfsrv_userdupcalls);
NFSUNLOCKNAMEID();
NFSFREECRED(cred);
if (!error) {
mbuf_freem(nd->nd_mrep);

View File

@ -669,6 +669,7 @@ void nfsrvd_rcv(struct socket *, void *, int);
#define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex)
#define NFSUNLOCKSOCK() mtx_unlock(&nfs_slock_mutex)
#define NFSNAMEIDMUTEX extern struct mtx nfs_nameid_mutex
#define NFSNAMEIDMUTEXPTR (&nfs_nameid_mutex)
#define NFSLOCKNAMEID() mtx_lock(&nfs_nameid_mutex)
#define NFSUNLOCKNAMEID() mtx_unlock(&nfs_nameid_mutex)
#define NFSNAMEIDREQUIRED() mtx_assert(&nfs_nameid_mutex, MA_OWNED)