Fix LORs between vn_start_write() and vn_lock() in the pNFS server.
When coding the pNFS server, I added several vn_start_write() calls done while the vnode was locked, not realizing I had introduced LORs and possible deadlock when an exported file system on the MDS is suspended. This patch fixes this by removing the added vn_start_write() calls and modifying the code so that the extant vn_start_write() call before the NFS RPC/operation is done when needed by the pNFS server. Flags are changed so that LayoutCommit and LayoutReturn now get a vn_start_write() done for them. When the pNFS server is enabled, the code now also changes the flags for Getattr, so that the vn_start_write() is done for Getattr, since it may need to do a vn_set_extattr(). The nfs_writerpc flag array was made global to the NFS server and renamed nfsrv_writerpc, which is consistent naming for globals in the NFS server. Thanks go to kib@ for reporting that doing vn_start_write() while the vnode is locked results in a LOR. This patch only affects the behaviour of the pNFS server.
This commit is contained in:
parent
3e6e16b4da
commit
3e5ba2e187
@ -156,9 +156,9 @@ struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS] = {
|
||||
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Dir Deleg */
|
||||
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device Info */
|
||||
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Get Device List */
|
||||
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */
|
||||
{ 0, 1, 0, 1, LK_EXCLUSIVE, 1, 1 }, /* Layout Commit */
|
||||
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Layout Get */
|
||||
{ 0, 1, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */
|
||||
{ 0, 1, 0, 1, LK_EXCLUSIVE, 1, 0 }, /* Layout Return */
|
||||
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Secinfo No name */
|
||||
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 0 }, /* Sequence */
|
||||
{ 0, 0, 0, 0, LK_EXCLUSIVE, 1, 1 }, /* Set SSV */
|
||||
|
@ -107,6 +107,9 @@ extern u_long sb_max_adj;
|
||||
extern int newnfs_numnfsd;
|
||||
extern struct proc *nfsd_master_proc;
|
||||
extern time_t nfsdev_time;
|
||||
extern int nfsrv_writerpc[NFS_NPROCS];
|
||||
extern volatile int nfsrv_devidcnt;
|
||||
extern struct nfsv4_opflag nfsv4_opflag[NFSV41_NOPS];
|
||||
|
||||
/*
|
||||
* NFS server system calls
|
||||
@ -527,8 +530,21 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args)
|
||||
nfsrvd_pool->sp_minthreads = args->minthreads;
|
||||
nfsrvd_pool->sp_maxthreads = args->maxthreads;
|
||||
|
||||
/*
|
||||
* If this is a pNFS service, make Getattr do a
|
||||
* vn_start_write(), so it can do a vn_set_extattr().
|
||||
*/
|
||||
if (nfsrv_devidcnt > 0) {
|
||||
nfsrv_writerpc[NFSPROC_GETATTR] = 1;
|
||||
nfsv4_opflag[NFSV4OP_GETATTR].modifyfs = 1;
|
||||
}
|
||||
|
||||
svc_run(nfsrvd_pool);
|
||||
|
||||
/* Reset Getattr to not do a vn_start_write(). */
|
||||
nfsrv_writerpc[NFSPROC_GETATTR] = 0;
|
||||
nfsv4_opflag[NFSV4OP_GETATTR].modifyfs = 0;
|
||||
|
||||
if (principal[0] != '\0') {
|
||||
rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER2);
|
||||
rpc_gss_clear_svc_name_call(NFS_PROG, NFS_VER3);
|
||||
|
@ -128,7 +128,7 @@ static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
|
||||
static int nfsrv_putfhname(fhandle_t *, char *);
|
||||
static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
|
||||
struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
|
||||
static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *,
|
||||
static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
|
||||
struct vnode *, NFSPROC_T *);
|
||||
static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
|
||||
static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
|
||||
@ -4068,21 +4068,16 @@ nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
|
||||
tpf->dsf_sin.sin_port = 0;
|
||||
}
|
||||
|
||||
error = vn_start_write(vp, &mp, V_WAIT);
|
||||
if (error == 0) {
|
||||
error = vn_extattr_set(vp, IO_NODELOCKED,
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
|
||||
sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
|
||||
if (error == 0)
|
||||
error = vn_extattr_set(vp, IO_NODELOCKED,
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
|
||||
sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
|
||||
if (error == 0)
|
||||
error = vn_extattr_set(vp, IO_NODELOCKED,
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
|
||||
sizeof(dsattr), (char *)&dsattr, p);
|
||||
vn_finished_write(mp);
|
||||
if (error != 0)
|
||||
printf("pNFS: pnfscreate setextattr=%d\n",
|
||||
error);
|
||||
} else
|
||||
printf("pNFS: pnfscreate startwrite=%d\n", error);
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
|
||||
sizeof(dsattr), (char *)&dsattr, p);
|
||||
if (error != 0)
|
||||
printf("pNFS: pnfscreate setextattr=%d\n",
|
||||
error);
|
||||
} else
|
||||
printf("pNFS: pnfscreate=%d\n", error);
|
||||
free(pf, M_TEMP);
|
||||
@ -4415,6 +4410,9 @@ nfsrv_proxyds(struct nfsrv_descript *nd, struct vnode *vp, off_t off, int cnt,
|
||||
tryagain:
|
||||
if (error == 0) {
|
||||
buflen = 1024;
|
||||
if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
|
||||
LK_EXCLUSIVE)
|
||||
printf("nfsrv_proxyds: Readds vp exclusively locked\n");
|
||||
error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
|
||||
&mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL);
|
||||
@ -4673,6 +4671,8 @@ nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
|
||||
if (fhiszero != 0)
|
||||
nfsrv_pnfssetfh(
|
||||
vp, pf,
|
||||
devid,
|
||||
fnamep,
|
||||
nvp, p);
|
||||
if (nvpp != NULL &&
|
||||
*nvpp == NULL) {
|
||||
@ -4746,21 +4746,15 @@ static int
|
||||
nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
|
||||
{
|
||||
struct pnfsdsattr dsattr;
|
||||
struct mount *mp;
|
||||
int error;
|
||||
|
||||
ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
|
||||
error = vn_start_write(vp, &mp, V_WAIT);
|
||||
if (error == 0) {
|
||||
dsattr.dsa_filerev = nap->na_filerev;
|
||||
dsattr.dsa_size = nap->na_size;
|
||||
dsattr.dsa_atime = nap->na_atime;
|
||||
dsattr.dsa_mtime = nap->na_mtime;
|
||||
error = vn_extattr_set(vp, IO_NODELOCKED,
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
|
||||
sizeof(dsattr), (char *)&dsattr, p);
|
||||
vn_finished_write(mp);
|
||||
}
|
||||
dsattr.dsa_filerev = nap->na_filerev;
|
||||
dsattr.dsa_size = nap->na_size;
|
||||
dsattr.dsa_atime = nap->na_atime;
|
||||
dsattr.dsa_mtime = nap->na_mtime;
|
||||
error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
|
||||
"pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
|
||||
if (error != 0)
|
||||
printf("pNFS: setextattr=%d\n", error);
|
||||
return (error);
|
||||
@ -5532,35 +5526,26 @@ nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
|
||||
* Set the file handle to the correct one.
|
||||
*/
|
||||
static void
|
||||
nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, struct vnode *nvp,
|
||||
NFSPROC_T *p)
|
||||
nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
|
||||
char *fnamep, struct vnode *nvp, NFSPROC_T *p)
|
||||
{
|
||||
struct mount *mp;
|
||||
struct nfsnode *np;
|
||||
int ret;
|
||||
|
||||
np = VTONFS(nvp);
|
||||
NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
|
||||
/*
|
||||
* We can only do a setextattr for an exclusively
|
||||
* locked vp. Instead of trying to upgrade a shared
|
||||
* lock, just leave dsf_fh zeroed out and it will
|
||||
* keep doing this lookup until it is done with an
|
||||
* exclusively locked vp.
|
||||
* We can only do a vn_set_extattr() if the vnode is exclusively
|
||||
* locked and vn_start_write() has been done. If devid != NULL or
|
||||
* fnamep != NULL or the vnode is shared locked, vn_start_write()
|
||||
* may not have been done.
|
||||
* If not done now, it will be done on a future call.
|
||||
*/
|
||||
if (NFSVOPISLOCKED(vp) == LK_EXCLUSIVE) {
|
||||
ret = vn_start_write(vp, &mp, V_WAIT);
|
||||
NFSD_DEBUG(4, "nfsrv_pnfssetfh: vn_start_write=%d\n",
|
||||
ret);
|
||||
if (ret == 0) {
|
||||
ret = vn_extattr_set(vp, IO_NODELOCKED,
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
|
||||
sizeof(*pf), (char *)pf, p);
|
||||
vn_finished_write(mp);
|
||||
NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft "
|
||||
"vn_extattr_set=%d\n", ret);
|
||||
}
|
||||
}
|
||||
if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
|
||||
LK_EXCLUSIVE)
|
||||
ret = vn_extattr_set(vp, IO_NODELOCKED,
|
||||
EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
|
||||
(char *)pf, p);
|
||||
NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
|
||||
}
|
||||
|
||||
|
@ -361,7 +361,7 @@ static int nfsrv_nonidempotent[NFS_V3NPROCS] = {
|
||||
* This static array indicates whether or not the RPC modifies the
|
||||
* file system.
|
||||
*/
|
||||
static int nfs_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0,
|
||||
int nfsrv_writerpc[NFS_NPROCS] = { 0, 0, 1, 0, 0, 0, 0,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
|
||||
|
||||
@ -517,10 +517,10 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
|
||||
lktype = LK_EXCLUSIVE;
|
||||
if (nd->nd_flag & ND_PUBLOOKUP)
|
||||
nfsd_fhtovp(nd, &nfs_pubfh, lktype, &vp, &nes,
|
||||
&mp, nfs_writerpc[nd->nd_procnum], p);
|
||||
&mp, nfsrv_writerpc[nd->nd_procnum], p);
|
||||
else
|
||||
nfsd_fhtovp(nd, &fh, lktype, &vp, &nes,
|
||||
&mp, nfs_writerpc[nd->nd_procnum], p);
|
||||
&mp, nfsrv_writerpc[nd->nd_procnum], p);
|
||||
if (nd->nd_repstat == NFSERR_PROGNOTV4)
|
||||
goto out;
|
||||
}
|
||||
@ -545,7 +545,7 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
|
||||
nfsrvd_statstart(nfsv3to4op[nd->nd_procnum], /*now*/ NULL);
|
||||
nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0,
|
||||
/*now*/ NULL, /*then*/ NULL);
|
||||
if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0)
|
||||
if (mp != NULL && nfsrv_writerpc[nd->nd_procnum] != 0)
|
||||
vn_finished_write(mp);
|
||||
goto out;
|
||||
}
|
||||
@ -576,7 +576,7 @@ nfsrvd_dorpc(struct nfsrv_descript *nd, int isdgram, u_char *tag, int taglen,
|
||||
error = (*(nfsrv3_procs0[nd->nd_procnum]))(nd, isdgram,
|
||||
vp, p, &nes);
|
||||
}
|
||||
if (mp != NULL && nfs_writerpc[nd->nd_procnum] != 0)
|
||||
if (mp != NULL && nfsrv_writerpc[nd->nd_procnum] != 0)
|
||||
vn_finished_write(mp);
|
||||
|
||||
nfsrvd_statend(nfsv3to4op[nd->nd_procnum], /*bytes*/ 0,
|
||||
|
Loading…
Reference in New Issue
Block a user