Serialize NFS vinvalbuf operations by acquiring/upgrading to the

vnode EXCLUSIVE lock. This prevents threads from adding pages to
the vnode while an invalidation is in progress, closing potential
races. In the bioread() path, callers acquire the SHARED vnode lock
- so while an invalidate was in progress, it was possible to fault
in new pages onto the vnode causing the invalidation to take a while
or fail. We saw these races at Yahoo! with very large files+heavy
concurrent access. Forcing an upgrade to EXCLUSIVE lock before doing
the invalidation closes all these races.

Submitted by:	Mohan Srinivasan mohans at yahoo-inc dot com
This commit is contained in:
Paul Saab 2004-12-06 18:52:28 +00:00
parent 568b7ee1b2
commit d54d263a79
4 changed files with 24 additions and 31 deletions

@ -1066,6 +1066,7 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred,
struct nfsnode *np = VTONFS(vp);
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int error = 0, slpflag, slptimeo;
int old_lock = 0;
ASSERT_VOP_LOCKED(vp, "nfs_vinvalbuf");
@ -1086,40 +1087,36 @@ nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred,
slpflag = 0;
slptimeo = 0;
}
/*
* First wait for any other process doing a flush to complete.
*/
while (np->n_flag & NFLUSHINPROG) {
np->n_flag |= NFLUSHWANT;
error = tsleep(&np->n_flag, PRIBIO + 2, "nfsvinval",
slptimeo);
if (error && intrflg &&
nfs_sigintr(nmp, NULL, td))
return (EINTR);
}
if ((old_lock = VOP_ISLOCKED(vp, td)) != LK_EXCLUSIVE) {
if (old_lock == LK_SHARED) {
/* Upgrade to exclusive lock, this might block */
vn_lock(vp, LK_UPGRADE | LK_RETRY, td);
} else {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
}
}
/*
* Now, flush as required.
*/
np->n_flag |= NFLUSHINPROG;
error = vinvalbuf(vp, flags, cred, td, slpflag, 0);
while (error) {
if (intrflg && (error = nfs_sigintr(nmp, NULL, td))) {
np->n_flag &= ~NFLUSHINPROG;
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
wakeup(&np->n_flag);
}
return (error);
}
if (intrflg && (error = nfs_sigintr(nmp, NULL, td)))
goto out;
error = vinvalbuf(vp, flags, cred, td, 0, slptimeo);
}
np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
wakeup(&np->n_flag);
}
return (0);
np->n_flag &= ~NMODIFIED;
out:
if (old_lock != LK_EXCLUSIVE) {
if (old_lock == LK_SHARED) {
/* Downgrade from exclusive lock, this might block */
vn_lock(vp, LK_DOWNGRADE, td);
} else {
VOP_UNLOCK(vp, 0, td);
}
}
return error;
}
/*

@ -315,7 +315,7 @@ nfs_inactive(struct vop_inactive_args *ap)
vrele(sp->s_dvp);
FREE((caddr_t)sp, M_NFSREQ);
}
np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
np->n_flag &= NMODIFIED;
VOP_UNLOCK(ap->a_vp, 0, ap->a_td);
return (0);
}

@ -504,9 +504,7 @@ nfs_close(struct vop_close_args *ap)
error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_td, cm);
/* np->n_flag &= ~NMODIFIED; */
} else {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_td);
error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_td, 1);
VOP_UNLOCK(vp, 0, ap->a_td);
}
np->n_attrstamp = 0;
}

@ -139,8 +139,6 @@ struct nfsnode {
/*
* Flags for n_flag
*/
#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */
#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */
#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
#define NWRITEERR 0x0008 /* Flag write errors so close will know */
/* 0x20, 0x40, 0x80 free */