Fix LORs between vn_start_write() and vn_lock() in the pNFS server.

When coding the pNFS server, I added several vn_start_write() calls done
while the vnode was locked, not realizing I had introduced LORs and
possible deadlock when an exported file system on the MDS is suspended.
This patch fixes this by removing the added vn_start_write() calls and
modifying the code so that the extant vn_start_write() call before the
NFS RPC/operation is done when needed by the pNFS server.
Flags are changed so that LayoutCommit and LayoutReturn now get a
vn_start_write() done for them.
When the pNFS server is enabled, the code now also changes the flags for
Getattr, so that the vn_start_write() is done for Getattr, since it may
need to do a vn_set_extattr(). The nfs_writerpc flag array was made global
to the NFS server and renamed nfsrv_writerpc, which is consistent naming
for globals in the NFS server.
Thanks go to kib@ for reporting that doing vn_start_write() while the vnode is
locked results in a LOR.
This patch only affects the behaviour of the pNFS server.
This commit is contained in:
Rick Macklem 2018-08-17 21:12:16 +00:00
parent 3e6e16b4da
commit 3e5ba2e187
4 changed files with 56 additions and 55 deletions

View File

@ -156,9 +156,9 @@ struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = {
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Dir Deleg */
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device Info */
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device List */
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */
{ 0, 1, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Get */
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */
{ 0, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Secinfo No name */
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Sequence */
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Set SSV */

View File

@ -107,6 +107,9 @@ extern u_long sb_max_adj;
extern int newnfs_numnfsd;
extern struct proc *nfsd_master_proc;
extern time_t nfsdev_time;
extern int nfsrv_writerpc[NFS_NPROCS];
extern volatile int nfsrv_devidcnt;
extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS];
/*
* NFS server system calls
@ -527,8 +530,21 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
nfsrvd_pool->sp_minthreads = args->minthreads;
nfsrvd_pool->sp_maxthreads = args->maxthreads;
/*
* If this is a pNFS service, make Getattr do a
* vn_start_write(), so it can do a vn_set_extattr().
*/
if (nfsrv_devidcnt > 0) {
nfsrv_writerpc[NFSPROC_GETATTR] = 1;
nfsv4_opflag[NFSV4OP_GETATTR].modifyfs = 1;
}
svc_run(nfsrvd_pool);
/* Reset Getattr to not do a vn_start_write(). */
nfsrv_writerpc[NFSPROC_GETATTR] = 0;
nfsv4_opflag[NFSV4OP_GETATTR].modifyfs = 0;
if (principal[0] != '\0') {
rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2);
rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3);

View File

@ -128,7 +128,7 @@ static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
static int nfsrv_putfhname(fhandle_t *, char *);
static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *,
static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
struct vnode *, NFSPROC_T *);
static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
@ -4068,21 +4068,16 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
tpf->dsf_sin.sin_port = 0;
}
error = vn_start_write(vp, &mp, V_WAIT);
if (error == 0) {
error = vn_extattr_set(vp, IO_NODELOCKED,
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
if (error == 0)
error = vn_extattr_set(vp, IO_NODELOCKED,
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
if (error == 0)
error = vn_extattr_set(vp, IO_NODELOCKED,
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
sizeof(dsattr), (char *)&dsattr, p);
vn_finished_write(mp);
if (error != 0)
printf("pNFS: pnfscreate setextattr=%d\n",
error);
} else
printf("pNFS: pnfscreate startwrite=%d\n", error);
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
sizeof(dsattr), (char *)&dsattr, p);
if (error != 0)
printf("pNFS: pnfscreate setextattr=%d\n",
error);
} else
printf("pNFS: pnfscreate=%d\n", error);
free(pf, M_TEMP);
@ -4415,6 +4410,9 @@ nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt,
tryagain:
if (error == 0) {
buflen = 1024;
if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
LK_EXCLUSIVE)
printf("nfsrv_proxyds: Readds vp exclusively locked\n");
error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
&mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
NULL, NULL);
@ -4673,6 +4671,8 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
if (fhiszero != 0)
nfsrv_pnfssetfh(
vp, pf,
devid,
fnamep,
nvp, p);
if (nvpp != NULL &&
*nvpp == NULL) {
@ -4746,21 +4746,15 @@ static int
nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
{
struct pnfsdsattr dsattr;
struct mount *mp;
int error;
ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
error = vn_start_write(vp, &mp, V_WAIT);
if (error == 0) {
dsattr.dsa_filerev = nap->na_filerev;
dsattr.dsa_size = nap->na_size;
dsattr.dsa_atime = nap->na_atime;
dsattr.dsa_mtime = nap->na_mtime;
error = vn_extattr_set(vp, IO_NODELOCKED,
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
sizeof(dsattr), (char *)&dsattr, p);
vn_finished_write(mp);
}
dsattr.dsa_filerev = nap->na_filerev;
dsattr.dsa_size = nap->na_size;
dsattr.dsa_atime = nap->na_atime;
dsattr.dsa_mtime = nap->na_mtime;
error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
"pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
if (error != 0)
printf("pNFS: setextattr=%d\n", error);
return (error);
@ -5532,35 +5526,26 @@ nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
* Set the file handle to the correct one.
*/
static void
nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *nvp,
NFSPROC_T *p)
nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
char *fnamep, struct vnode *nvp, NFSPROC_T *p)
{
struct mount *mp;
struct nfsnode *np;
int ret;
np = VTONFS(nvp);
NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
/*
* We can only do a setextattr for an exclusively
* locked vp. Instead of trying to upgrade a shared
* lock, just leave dsf_fh zeroed out and it will
* keep doing this lookup until it is done with an
* exclusively locked vp.
* We can only do a vn_set_extattr() if the vnode is exclusively
* locked and vn_start_write() has been done. If devid != NULL or
* fnamep != NULL or the vnode is shared locked, vn_start_write()
* may not have been done.
* If not done now, it will be done on a future call.
*/
if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) {
ret = vn_start_write(vp, &mp, V_WAIT);
NFSD_DEBUG(4, "nfsrv_pnfssetfh: vn_start_write=%d\n",
ret);
if (ret == 0) {
ret = vn_extattr_set(vp, IO_NODELOCKED,
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
sizeof(*pf), (char *)pf, p);
vn_finished_write(mp);
NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft "
"vn_extattr_set=%d\n", ret);
}
}
if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
LK_EXCLUSIVE)
ret = vn_extattr_set(vp, IO_NODELOCKED,
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
(char *)pf, p);
NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
}

View File

@ -361,7 +361,7 @@ static int nfsrv_nonidempotent[NFS_V3NPROCS] = {
* This static array indicates whether or not the RPC modifies the
* file system.
*/
static int nfs_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0,
int nfsrv_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
@ -517,10 +517,10 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
lktype = LK_EXCLUSIVE;
if (nd->nd_flag & ND_PUBLOOKUP)
nfsd_fhtovp(nd, &nfs_pubfh, lktype, &vp, &nes,
&mp, nfs_writerpc[nd->nd_procnum], p);
&mp, nfsrv_writerpc[nd->nd_procnum], p);
else
nfsd_fhtovp(nd, &fh, lktype, &vp, &nes,
&mp, nfs_writerpc[nd->nd_procnum], p);
&mp, nfsrv_writerpc[nd->nd_procnum], p);
if (nd->nd_repstat == NFSERR_PROGNOTV4)
goto out;
}
@ -545,7 +545,7 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], /*now*/ NULL);
nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0,
/*now*/ NULL, /*then*/ NULL);
if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0)
if (mp != NULL && nfsrv_writerpc[nd->nd_procnum] != 0)
vn_finished_write(mp);
goto out;
}
@ -576,7 +576,7 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
error = (*(nfsrv3_procs0[nd->nd_procnum]))(nd, isdgram,
vp, p, &nes);
}
if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0)
if (mp != NULL && nfsrv_writerpc[nd->nd_procnum] != 0)
vn_finished_write(mp);
nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0,