Do not translate the IN_ACCESS inode flag into the IN_MODIFIED while filesystem

is suspending/suspended. Doing so may result in deadlock. Instead, set the
(new) IN_LAZYACCESS flag, that becomes IN_MODIFIED when suspend is lifted.

Change the locking protocol in order to set the IN_ACCESS and timestamps
without upgrading shared vnode lock to exclusive (see comments in the
inode.h). Before that, inode was modified while holding only shared
lock.

Tested by:	Peter Holm
Reviewed by:	tegge, bde
Approved by:	pjd (mentor)
MFC after:	3 weeks
This commit is contained in:
Konstantin Belousov 2006-10-10 09:20:54 +00:00
parent f0d43ae993
commit ec7a247a24
5 changed files with 89 additions and 34 deletions

View File

@ -66,9 +66,11 @@ static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
* IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode
* to disk if the IN_MODIFIED flag is set (it may be set initially, or by
* the timestamp update). The IN_LAZYMOD flag is set to force a write
* later if not now. If we write now, then clear both IN_MODIFIED and
* IN_LAZYMOD to reflect the presumably successful write, and if waitfor is
* set, then wait for the write to complete.
* later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
* is currently being suspended (or is suspended) and vnode has been accessed.
* If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
* reflect the presumably successful write, and if waitfor is set, then wait
* for the write to complete.
*/
int
ffs_update(vp, waitfor)
@ -80,12 +82,12 @@ ffs_update(vp, waitfor)
struct inode *ip;
int error;
ASSERT_VOP_LOCKED(vp, "ffs_update");
ASSERT_VOP_ELOCKED(vp, "ffs_update");
ufs_itimes(vp);
ip = VTOI(vp);
if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
return (0);
ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED);
ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
fs = ip->i_fs;
if (fs->fs_ronly)
return (0);

View File

@ -2309,15 +2309,16 @@ readblock(vp, bp, lbn)
return (bp->b_error);
}
/*
* Process file deletes that were deferred by ufs_inactive() due to
* the file system being suspended.
* the file system being suspended. Transfer IN_LAZYACCESS into
* IN_MODIFIED for vnodes that were accessed during suspension.
*/
static void
process_deferred_inactive(struct mount *mp)
{
struct vnode *vp, *mvp;
struct inode *ip;
struct thread *td;
int error;
@ -2327,9 +2328,15 @@ process_deferred_inactive(struct mount *mp)
loop:
MNT_VNODE_FOREACH(vp, mp, mvp) {
VI_LOCK(vp);
if ((vp->v_iflag & (VI_DOOMED | VI_OWEINACT)) != VI_OWEINACT ||
vp->v_usecount > 0 ||
vp->v_type == VNON) {
/*
* IN_LAZYACCESS is checked here without holding any
* vnode lock, but this flag is set only while holding
* vnode interlock.
*/
if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED) != 0 ||
((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 &&
((vp->v_iflag & VI_OWEINACT) == 0 ||
vp->v_usecount > 0))) {
VI_UNLOCK(vp);
continue;
}
@ -2344,8 +2351,13 @@ process_deferred_inactive(struct mount *mp)
MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
goto loop;
}
ip = VTOI(vp);
if ((ip->i_flag & IN_LAZYACCESS) != 0) {
ip->i_flag &= ~IN_LAZYACCESS;
ip->i_flag |= IN_MODIFIED;
}
VI_LOCK(vp);
if ((vp->v_iflag & VI_OWEINACT) == 0) {
if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) {
VI_UNLOCK(vp);
VOP_UNLOCK(vp, 0, td);
vdrop(vp);

View File

@ -593,8 +593,11 @@ ffs_read(ap)
}
if ((error == 0 || uio->uio_resid != orig_resid) &&
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
VI_LOCK(vp);
ip->i_flag |= IN_ACCESS;
VI_UNLOCK(vp);
}
return (error);
}
@ -989,8 +992,11 @@ ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
}
if ((error == 0 || uio->uio_resid != orig_resid) &&
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
VI_LOCK(vp);
ip->i_flag |= IN_ACCESS;
VI_UNLOCK(vp);
}
return (error);
}

View File

@ -55,6 +55,13 @@
* is the permanent meta-data associated with the file which is read in
* from the permanent dinode from long term storage when the file becomes
* active, and is put back when the file is no longer being used.
*
* An inode may only be changed while holding either the exclusive
* vnode lock or the shared vnode lock and the vnode interlock. We use
* the latter only for "read" and "get" operations that require
* changing i_flag, or a timestamp. This locking protocol allows executing
* those operations without having to upgrade the vnode lock from shared to
* exclusive.
*/
struct inode {
TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */
@ -119,6 +126,8 @@ struct inode {
#define IN_RENAME 0x0010 /* Inode is being renamed. */
#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */
#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */
#define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the
suspension finished */
#define i_devvp i_ump->um_devvp
#define i_umbufobj i_ump->um_bo

View File

@ -128,31 +128,49 @@ ufs_itimes(vp)
{
struct inode *ip;
struct timespec ts;
int mnt_locked;
ip = VTOI(vp);
mnt_locked = 0;
if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
VI_LOCK(vp);
goto out;
}
MNT_ILOCK(vp->v_mount); /* For reading of mnt_kern_flags. */
mnt_locked = 1;
VI_LOCK(vp);
if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
return;
goto out_unl;
if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
ip->i_flag |= IN_LAZYMOD;
else
else if (((vp->v_mount->mnt_kern_flag &
(MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
(ip->i_flag & (IN_CHANGE | IN_UPDATE)))
ip->i_flag |= IN_MODIFIED;
if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
vfs_timestamp(&ts);
if (ip->i_flag & IN_ACCESS) {
DIP_SET(ip, i_atime, ts.tv_sec);
DIP_SET(ip, i_atimensec, ts.tv_nsec);
}
if (ip->i_flag & IN_UPDATE) {
DIP_SET(ip, i_mtime, ts.tv_sec);
DIP_SET(ip, i_mtimensec, ts.tv_nsec);
ip->i_modrev++;
}
if (ip->i_flag & IN_CHANGE) {
DIP_SET(ip, i_ctime, ts.tv_sec);
DIP_SET(ip, i_ctimensec, ts.tv_nsec);
}
else if (ip->i_flag & IN_ACCESS)
ip->i_flag |= IN_LAZYACCESS;
vfs_timestamp(&ts);
if (ip->i_flag & IN_ACCESS) {
DIP_SET(ip, i_atime, ts.tv_sec);
DIP_SET(ip, i_atimensec, ts.tv_nsec);
}
if (ip->i_flag & IN_UPDATE) {
DIP_SET(ip, i_mtime, ts.tv_sec);
DIP_SET(ip, i_mtimensec, ts.tv_nsec);
ip->i_modrev++;
}
if (ip->i_flag & IN_CHANGE) {
DIP_SET(ip, i_ctime, ts.tv_sec);
DIP_SET(ip, i_ctimensec, ts.tv_nsec);
}
out:
ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
out_unl:
VI_UNLOCK(vp);
if (mnt_locked)
MNT_IUNLOCK(vp->v_mount);
}
/*
@ -266,11 +284,13 @@ ufs_close(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
int usecount;
VI_LOCK(vp);
if (vp->v_usecount > 1)
ufs_itimes(vp);
usecount = vp->v_usecount;
VI_UNLOCK(vp);
if (usecount > 1)
ufs_itimes(vp);
return (0);
}
@ -378,8 +398,10 @@ ufs_getattr(ap)
if (ip->i_ump->um_fstype == UFS1) {
vap->va_rdev = ip->i_din1->di_rdev;
vap->va_size = ip->i_din1->di_size;
VI_LOCK(vp);
vap->va_atime.tv_sec = ip->i_din1->di_atime;
vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
VI_UNLOCK(vp);
vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
@ -390,8 +412,10 @@ ufs_getattr(ap)
} else {
vap->va_rdev = ip->i_din2->di_rdev;
vap->va_size = ip->i_din2->di_size;
VI_LOCK(vp);
vap->va_atime.tv_sec = ip->i_din2->di_atime;
vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
VI_UNLOCK(vp);
vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
@ -1992,11 +2016,13 @@ ufsfifo_close(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
int usecount;
VI_LOCK(vp);
if (vp->v_usecount > 1)
ufs_itimes(vp);
usecount = vp->v_usecount;
VI_UNLOCK(vp);
if (usecount > 1)
ufs_itimes(vp);
return (fifo_specops.vop_close(ap));
}