From ec7a247a249800a16824f44d4043f1131d667e19 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 10 Oct 2006 09:20:54 +0000 Subject: [PATCH] Do not translate the IN_ACCESS inode flag into the IN_MODIFIED while filesystem is suspending/suspended. Doing so may result in deadlock. Instead, set the (new) IN_LAZYACCESS flag, that becomes IN_MODIFIED when suspend is lifted. Change the locking protocol in order to set the IN_ACCESS and timestamps without upgrading shared vnode lock to exclusive (see comments in the inode.h). Before that, inode was modified while holding only shared lock. Tested by: Peter Holm Reviewed by: tegge, bde Approved by: pjd (mentor) MFC after: 3 weeks --- sys/ufs/ffs/ffs_inode.c | 12 ++++--- sys/ufs/ffs/ffs_snapshot.c | 24 ++++++++++---- sys/ufs/ffs/ffs_vnops.c | 10 ++++-- sys/ufs/ufs/inode.h | 9 +++++ sys/ufs/ufs/ufs_vnops.c | 68 ++++++++++++++++++++++++++------------ 5 files changed, 89 insertions(+), 34 deletions(-) diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 33b65bf22604..ad6bff918982 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -66,9 +66,11 @@ static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode * to disk if the IN_MODIFIED flag is set (it may be set initially, or by * the timestamp update). The IN_LAZYMOD flag is set to force a write - * later if not now. If we write now, then clear both IN_MODIFIED and - * IN_LAZYMOD to reflect the presumably successful write, and if waitfor is - * set, then wait for the write to complete. + * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs + * is currently being suspended (or is suspended) and vnode has been accessed. + * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to + * reflect the presumably successful write, and if waitfor is set, then wait + * for the write to complete. */ int ffs_update(vp, waitfor) @@ -80,12 +82,12 @@ ffs_update(vp, waitfor) struct inode *ip; int error; - ASSERT_VOP_LOCKED(vp, "ffs_update"); + ASSERT_VOP_ELOCKED(vp, "ffs_update"); ufs_itimes(vp); ip = VTOI(vp); if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) return (0); - ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED); + ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); fs = ip->i_fs; if (fs->fs_ronly) return (0); diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 473dbfdf338d..6ab9af25bcde 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -2309,15 +2309,16 @@ readblock(vp, bp, lbn) return (bp->b_error); } - /* * Process file deletes that were deferred by ufs_inactive() due to - * the file system being suspended. + * the file system being suspended. Transfer IN_LAZYACCESS into + * IN_MODIFIED for vnodes that were accessed during suspension. */ static void process_deferred_inactive(struct mount *mp) { struct vnode *vp, *mvp; + struct inode *ip; struct thread *td; int error; @@ -2327,9 +2328,15 @@ process_deferred_inactive(struct mount *mp) loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); - if ((vp->v_iflag & (VI_DOOMED | VI_OWEINACT)) != VI_OWEINACT || - vp->v_usecount > 0 || - vp->v_type == VNON) { + /* + * IN_LAZYACCESS is checked here without holding any + * vnode lock, but this flag is set only while holding + * vnode interlock. + */ + if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED) != 0 || + ((VTOI(vp)->i_flag & IN_LAZYACCESS) == 0 && + ((vp->v_iflag & VI_OWEINACT) == 0 || + vp->v_usecount > 0))) { VI_UNLOCK(vp); continue; } @@ -2344,8 +2351,13 @@ process_deferred_inactive(struct mount *mp) MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); goto loop; } + ip = VTOI(vp); + if ((ip->i_flag & IN_LAZYACCESS) != 0) { + ip->i_flag &= ~IN_LAZYACCESS; + ip->i_flag |= IN_MODIFIED; + } VI_LOCK(vp); - if ((vp->v_iflag & VI_OWEINACT) == 0) { + if ((vp->v_iflag & VI_OWEINACT) == 0 || vp->v_usecount > 0) { VI_UNLOCK(vp); VOP_UNLOCK(vp, 0, td); vdrop(vp); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 91d2c01564ff..4d8d9ef009ce 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -593,8 +593,11 @@ ffs_read(ap) } if ((error == 0 || uio->uio_resid != orig_resid) && - (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) + (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) { + VI_LOCK(vp); ip->i_flag |= IN_ACCESS; + VI_UNLOCK(vp); + } return (error); } @@ -989,8 +992,11 @@ ffs_extread(struct vnode *vp, struct uio *uio, int ioflag) } if ((error == 0 || uio->uio_resid != orig_resid) && - (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) + (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) { + VI_LOCK(vp); ip->i_flag |= IN_ACCESS; + VI_UNLOCK(vp); + } return (error); } diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 38e2c3098c64..2fc333693b7c 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -55,6 +55,13 @@ * is the permanent meta-data associated with the file which is read in * from the permanent dinode from long term storage when the file becomes * active, and is put back when the file is no longer being used. + * + * An inode may only be changed while holding either the exclusive + * vnode lock or the shared vnode lock and the vnode interlock. We use + * the latter only for "read" and "get" operations that require + * changing i_flag, or a timestamp. This locking protocol allows executing + * those operations without having to upgrade the vnode lock from shared to + * exclusive. */ struct inode { TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */ @@ -119,6 +126,8 @@ struct inode { #define IN_RENAME 0x0010 /* Inode is being renamed. */ #define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */ #define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */ +#define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the + suspension finished */ #define i_devvp i_ump->um_devvp #define i_umbufobj i_ump->um_bo diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 768860a7570a..687dc70b8408 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -128,31 +128,49 @@ ufs_itimes(vp) { struct inode *ip; struct timespec ts; + int mnt_locked; ip = VTOI(vp); + mnt_locked = 0; + if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) { + VI_LOCK(vp); + goto out; + } + MNT_ILOCK(vp->v_mount); /* For reading of mnt_kern_flags. */ + mnt_locked = 1; + VI_LOCK(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) - return; + goto out_unl; + if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) ip->i_flag |= IN_LAZYMOD; - else + else if (((vp->v_mount->mnt_kern_flag & + (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || + (ip->i_flag & (IN_CHANGE | IN_UPDATE))) ip->i_flag |= IN_MODIFIED; - if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { - vfs_timestamp(&ts); - if (ip->i_flag & IN_ACCESS) { - DIP_SET(ip, i_atime, ts.tv_sec); - DIP_SET(ip, i_atimensec, ts.tv_nsec); - } - if (ip->i_flag & IN_UPDATE) { - DIP_SET(ip, i_mtime, ts.tv_sec); - DIP_SET(ip, i_mtimensec, ts.tv_nsec); - ip->i_modrev++; - } - if (ip->i_flag & IN_CHANGE) { - DIP_SET(ip, i_ctime, ts.tv_sec); - DIP_SET(ip, i_ctimensec, ts.tv_nsec); - } + else if (ip->i_flag & IN_ACCESS) + ip->i_flag |= IN_LAZYACCESS; + vfs_timestamp(&ts); + if (ip->i_flag & IN_ACCESS) { + DIP_SET(ip, i_atime, ts.tv_sec); + DIP_SET(ip, i_atimensec, ts.tv_nsec); } + if (ip->i_flag & IN_UPDATE) { + DIP_SET(ip, i_mtime, ts.tv_sec); + DIP_SET(ip, i_mtimensec, ts.tv_nsec); + ip->i_modrev++; + } + if (ip->i_flag & IN_CHANGE) { + DIP_SET(ip, i_ctime, ts.tv_sec); + DIP_SET(ip, i_ctimensec, ts.tv_nsec); + } + + out: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); + out_unl: + VI_UNLOCK(vp); + if (mnt_locked) + MNT_IUNLOCK(vp->v_mount); } /* @@ -266,11 +284,13 @@ ufs_close(ap) } */ *ap; { struct vnode *vp = ap->a_vp; + int usecount; VI_LOCK(vp); - if (vp->v_usecount > 1) - ufs_itimes(vp); + usecount = vp->v_usecount; VI_UNLOCK(vp); + if (usecount > 1) + ufs_itimes(vp); return (0); } @@ -378,8 +398,10 @@ ufs_getattr(ap) if (ip->i_ump->um_fstype == UFS1) { vap->va_rdev = ip->i_din1->di_rdev; vap->va_size = ip->i_din1->di_size; + VI_LOCK(vp); vap->va_atime.tv_sec = ip->i_din1->di_atime; vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; + VI_UNLOCK(vp); vap->va_mtime.tv_sec = ip->i_din1->di_mtime; vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din1->di_ctime; @@ -390,8 +412,10 @@ ufs_getattr(ap) } else { vap->va_rdev = ip->i_din2->di_rdev; vap->va_size = ip->i_din2->di_size; + VI_LOCK(vp); vap->va_atime.tv_sec = ip->i_din2->di_atime; vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; + VI_UNLOCK(vp); vap->va_mtime.tv_sec = ip->i_din2->di_mtime; vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din2->di_ctime; @@ -1992,11 +2016,13 @@ ufsfifo_close(ap) } */ *ap; { struct vnode *vp = ap->a_vp; + int usecount; VI_LOCK(vp); - if (vp->v_usecount > 1) - ufs_itimes(vp); + usecount = vp->v_usecount; VI_UNLOCK(vp); + if (usecount > 1) + ufs_itimes(vp); return (fifo_specops.vop_close(ap)); }