Serialize NFS vinvalbuf operations by acquiring/upgrading to the
vnode EXCLUSIVE lock. This prevents threads from adding pages to the vnode while an invalidation is in progress, closing potential races. In the bioread() path, callers acquire the SHARED vnode lock - so while an invalidate was in progress, it was possible to fault in new pages onto the vnode causing the invalidation to take a while or fail. We saw these races at Yahoo! with very large files+heavy concurrent access. Forcing an upgrade to EXCLUSIVE lock before doing the invalidation closes all these races. Submitted by: Mohan Srinivasan mohans at yahoo-inc dot com
This commit is contained in:
parent
568b7ee1b2
commit
d54d263a79
sys/nfsclient
@ -1066,6 +1066,7 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred,
|
||||
struct nfsnode *np = VTONFS(vp);
|
||||
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
|
||||
int error = 0, slpflag, slptimeo;
|
||||
int old_lock = 0;
|
||||
|
||||
ASSERT_VOP_LOCKED(vp, "nfs_vinvalbuf");
|
||||
|
||||
@ -1086,40 +1087,36 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred,
|
||||
slpflag = 0;
|
||||
slptimeo = 0;
|
||||
}
|
||||
/*
|
||||
* First wait for any other process doing a flush to complete.
|
||||
*/
|
||||
while (np->n_flag & NFLUSHINPROG) {
|
||||
np->n_flag |= NFLUSHWANT;
|
||||
error = tsleep(&np->n_flag, PRIBIO + 2, "nfsvinval",
|
||||
slptimeo);
|
||||
if (error && intrflg &&
|
||||
nfs_sigintr(nmp, NULL, td))
|
||||
return (EINTR);
|
||||
}
|
||||
|
||||
if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) {
|
||||
if (old_lock == LK_SHARED) {
|
||||
/* Upgrade to exclusive lock, this might block */
|
||||
vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
|
||||
} else {
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, flush as required.
|
||||
*/
|
||||
np->n_flag |= NFLUSHINPROG;
|
||||
error = vinvalbuf(vp, flags, cred, td, slpflag, 0);
|
||||
while (error) {
|
||||
if (intrflg && (error = nfs_sigintr(nmp, NULL, td))) {
|
||||
np->n_flag &= ~NFLUSHINPROG;
|
||||
if (np->n_flag & NFLUSHWANT) {
|
||||
np->n_flag &= ~NFLUSHWANT;
|
||||
wakeup(&np->n_flag);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
if (intrflg && (error = nfs_sigintr(nmp, NULL, td)))
|
||||
goto out;
|
||||
error = vinvalbuf(vp, flags, cred, td, 0, slptimeo);
|
||||
}
|
||||
np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
|
||||
if (np->n_flag & NFLUSHWANT) {
|
||||
np->n_flag &= ~NFLUSHWANT;
|
||||
wakeup(&np->n_flag);
|
||||
}
|
||||
return (0);
|
||||
np->n_flag &= ~NMODIFIED;
|
||||
out:
|
||||
if (old_lock != LK_EXCLUSIVE) {
|
||||
if (old_lock == LK_SHARED) {
|
||||
/* Downgrade from exclusive lock, this might block */
|
||||
vn_lock(vp, LK_DOWNGRADE, td);
|
||||
} else {
|
||||
VOP_UNLOCK(vp, 0, td);
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -315,7 +315,7 @@ nfs_inactive(struct vop_inactive_args *ap)
|
||||
vrele(sp->s_dvp);
|
||||
FREE((caddr_t)sp, M_NFSREQ);
|
||||
}
|
||||
np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
|
||||
np->n_flag &= NMODIFIED;
|
||||
VOP_UNLOCK(ap->a_vp, 0, ap->a_td);
|
||||
return (0);
|
||||
}
|
||||
|
@ -504,9 +504,7 @@ nfs_close(struct vop_close_args *ap)
|
||||
error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_td, cm);
|
||||
/* np->n_flag &= ~NMODIFIED; */
|
||||
} else {
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td);
|
||||
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1);
|
||||
VOP_UNLOCK(vp, 0, ap->a_td);
|
||||
}
|
||||
np->n_attrstamp = 0;
|
||||
}
|
||||
|
@ -139,8 +139,6 @@ struct nfsnode {
|
||||
/*
|
||||
* Flags for n_flag
|
||||
*/
|
||||
#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */
|
||||
#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */
|
||||
#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
|
||||
#define NWRITEERR 0x0008 /* Flag write errors so close will know */
|
||||
/* 0x20, 0x40, 0x80 free */
|
||||
|
Loading…
x
Reference in New Issue
Block a user