Handle LoR in flush_pagedep_deps().

When operating in SU or SU+J mode, ffs_syncvnode() might need to
instantiate other vnode by inode number while owning syncing vnode
lock.  Typically this other vnode is the parent of our vnode, but due
to renames occuring right before fsync (or during fsync when we drop
the syncing vnode lock, see below) it might be no longer parent.

More, the called function flush_pagedep_deps() needs to lock other
vnode while owning the lock for vnode which owns the buffer, for which
the dependencies are flushed.  This creates another instance of the
same LoR as was fixed in softdep_sync().

Put the generic code for safe relocking into new SU helper
get_parent_vp() and use it in flush_pagedep_deps().  The case for safe
relocking of two vnodes with undefined lock order was extracted into
vn helper vn_lock_pair().

Due to call sequence
     ffs_syncvnode()->softdep_sync_buf()->flush_pagedep_deps(),
ffs_syncvnode() indicates with ERELOOKUP that passed vnode was
unlocked in process, and can return ENOENT if the passed vnode
reclaimed.  All callers of the function were inspected.

Because UFS namei lookups store auxiliary information about directory
entry in in-memory directory inode, and this information is then used
by UFS code that creates/removed directory entry in the actual
mutating VOPs, it is critical that directory vnode lock is not dropped
between lookup and VOP.  For softdep_prelink(), which ensures that
later link/unlink operation can proceed without overflowing the
journal, calls were moved to the place where it is safe to drop
processing VOP because mutations are not yet applied.  Then, ERELOOKUP
causes restart of the whole VFS operation (typically VFS syscall) at
top level, including the re-lookup of the involved pathes.  [Note that
we already do the same restart for failing calls to vn_start_write(),
so formally this patch does not introduce new behavior.]

Similarly, unsafe calls to fsync in snapshot creation code were
plugged.  A possible view on these failures is that it does not make
sense to continue creating snapshot if the snapshot vnode was
reclaimed due to forced unmount.

It is possible that relock/ERELOOKUP situation occurs in
ffs_truncate() called from ufs_inactive().  In this case, dropping the
vnode lock is not safe.  Detect the situation with VI_DOINGINACT and
reschedule inactivation by setting VI_OWEINACT.  ufs_inactive()
rechecks VI_OWEINACT and avoids reclaiming vnode is truncation failed
this way.

In ffs_truncate(), allocation of the EOF block for partial truncation
is re-done after vnode is synced, since we cannot leave the buffer
locked through ffs_syncvnode().

In collaboration with:	pho
Reviewed by:	mckusick (previous version), markj
Tested by:	markj (syzkaller), pho
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D26136
This commit is contained in:
Konstantin Belousov 2020-11-14 05:30:10 +00:00
parent 738ea0010b
commit 8a1509e442
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=367672
9 changed files with 494 additions and 106 deletions

View File

@ -173,6 +173,9 @@ void softdep_load_inodeblock(struct inode *);
void softdep_freefile(struct vnode *, ino_t, int);
int softdep_request_cleanup(struct fs *, struct vnode *,
struct ucred *, int);
int softdep_prerename(struct vnode *, struct vnode *, struct vnode *,
struct vnode *);
int softdep_prelink(struct vnode *, struct vnode *, int);
void softdep_setup_freeblocks(struct inode *, off_t, int);
void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int);
void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,

View File

@ -462,6 +462,8 @@ ffs_truncate(vp, length, flags, cred)
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error)
return (error);
ffs_inode_bwrite(vp, bp, flags);
/*
* When we are doing soft updates and the UFS_BALLOC
* above fills in a direct block hole with a full sized
@ -474,6 +476,10 @@ ffs_truncate(vp, length, flags, cred)
fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
(error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
return (error);
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error)
return (error);
ip->i_size = length;
DIP_SET(ip, i_size, length);
size = blksize(fs, ip, lbn);

View File

@ -301,6 +301,8 @@ ffs_snapshot(mp, snapfile)
NDFREE(&nd, NDF_ONLY_PNBUF);
vn_finished_write(wrtmp);
vrele(nd.ni_dvp);
if (error == ERELOOKUP)
goto restart;
return (error);
}
vp = nd.ni_vp;
@ -368,8 +370,12 @@ ffs_snapshot(mp, snapfile)
if (error)
goto out;
bawrite(nbp);
if (cg % 10 == 0)
ffs_syncvnode(vp, MNT_WAIT, 0);
if (cg % 10 == 0) {
error = ffs_syncvnode(vp, MNT_WAIT, 0);
/* vp possibly reclaimed if unlocked */
if (error != 0)
goto out;
}
}
/*
* Copy all the cylinder group maps. Although the
@ -391,8 +397,8 @@ ffs_snapshot(mp, snapfile)
goto out;
error = cgaccount(cg, vp, nbp, 1);
bawrite(nbp);
if (cg % 10 == 0)
ffs_syncvnode(vp, MNT_WAIT, 0);
if (cg % 10 == 0 && error == 0)
error = ffs_syncvnode(vp, MNT_WAIT, 0);
if (error)
goto out;
}

View File

@ -609,6 +609,27 @@ softdep_freework(wkhd)
panic("softdep_freework called");
}
int
softdep_prerename(fdvp, fvp, tdvp, tvp)
struct vnode *fdvp;
struct vnode *fvp;
struct vnode *tdvp;
struct vnode *tvp;
{
panic("softdep_prerename called");
}
int
softdep_prelink(dvp, vp, will_direnter)
struct vnode *dvp;
struct vnode *vp;
int will_direnter;
{
panic("softdep_prelink called");
}
#else
FEATURE(softupdates, "FFS soft-updates support");
@ -748,7 +769,7 @@ static void unlinked_inodedep(struct mount *, struct inodedep *);
static void clear_unlinked_inodedep(struct inodedep *);
static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
static int flush_pagedep_deps(struct vnode *, struct mount *,
struct diraddhd *);
struct diraddhd *, struct buf *);
static int free_pagedep(struct pagedep *);
static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
@ -925,7 +946,6 @@ static void journal_unmount(struct ufsmount *);
static int journal_space(struct ufsmount *, int);
static void journal_suspend(struct ufsmount *);
static int journal_unsuspend(struct ufsmount *ump);
static void softdep_prelink(struct vnode *, struct vnode *);
static void add_to_journal(struct worklist *);
static void remove_from_journal(struct worklist *);
static bool softdep_excess_items(struct ufsmount *, int);
@ -1389,6 +1409,136 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
/* List of all filesystems mounted with soft updates */
static TAILQ_HEAD(, mount_softdeps) softdepmounts;
/*
* This function fetches inode inum on mount point mp. We already
* hold a locked vnode vp, and might have a locked buffer bp belonging
* to vp.
* We must not block on acquiring the new inode lock as we will get
* into a lock-order reversal with the buffer lock and possibly get a
* deadlock. Thus if we cannot instantiate the requested vnode
* without sleeping on its lock, we must unlock the vnode and the
* buffer before doing a blocking on the vnode lock. We return
* ERELOOKUP if we have had to unlock either the vnode or the buffer so
* that the caller can reassess its state.
*
* Top-level VFS code (for syscalls and other consumers, e.g. callers
* of VOP_FSYNC() in syncer) check for ERELOOKUP and restart at safe
* point.
*
* Since callers expect to operate on fully constructed vnode, we also
* recheck v_data after relock, and return ENOENT if NULL.
*
* If unlocking bp, we must unroll dequeueing its unfinished
* dependencies, and clear scan flag, before unlocking. If unlocking
* vp while it is under deactivation, we re-queue deactivation.
*/
static int
get_parent_vp(struct vnode *vp, struct mount *mp, ino_t inum, struct buf *bp,
struct diraddhd *diraddhdp, struct diraddhd *unfinishedp,
struct vnode **rvp)
{
struct vnode *pvp;
struct diradd *dap;
int error;
bool bplocked;
ASSERT_VOP_ELOCKED(vp, "child vnode must be locked");
for (bplocked = true, pvp = NULL;;) {
error = ffs_vgetf(mp, inum, LK_EXCLUSIVE | LK_NOWAIT, &pvp,
FFSV_FORCEINSMQ);
if (error == 0) {
/*
* Since we could have unlocked vp, the inode
* number could no longer indicate a
* constructed node. In this case, we must
* restart the syscall.
*/
if (VTOI(pvp)->i_mode == 0 || !bplocked) {
if (VTOI(pvp)->i_mode == 0)
vgone(pvp);
vput(pvp);
error = ERELOOKUP;
goto out;
}
error = 0;
goto out1;
}
if (bp != NULL && bplocked) {
/*
* Requeue unfinished dependencies before
* unlocking buffer, which could make
* diraddhdp invalid.
*/
ACQUIRE_LOCK(VFSTOUFS(mp));
while ((dap = LIST_FIRST(unfinishedp)) != NULL) {
LIST_REMOVE(dap, da_pdlist);
LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
}
FREE_LOCK(VFSTOUFS(mp));
bp->b_vflags &= ~BV_SCANNED;
BUF_NOREC(bp);
BUF_UNLOCK(bp);
bplocked = false;
}
/*
* Do not drop vnode lock while inactivating. This
* would result in leaks of the VI flags and
* reclaiming of non-truncated vnode. Instead,
* re-schedule inactivation hoping that we would be
* able to sync inode later.
*/
if ((vp->v_iflag & VI_DOINGINACT) != 0) {
VI_LOCK(vp);
vp->v_iflag |= VI_OWEINACT;
VI_UNLOCK(vp);
return (ERELOOKUP);
}
VOP_UNLOCK(vp);
error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &pvp,
FFSV_FORCEINSMQ);
if (error != 0) {
MPASS(error != ERELOOKUP);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
break;
}
if (VTOI(pvp)->i_mode == 0) {
vgone(pvp);
vput(pvp);
pvp = NULL;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
error = ERELOOKUP;
break;
}
error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
if (error == 0)
break;
vput(pvp);
pvp = NULL;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_data == NULL) {
error = ENOENT;
break;
}
}
if (bp != NULL) {
MPASS(!bplocked);
error = ERELOOKUP;
}
if (error != 0 && pvp != NULL) {
vput(pvp);
pvp = NULL;
}
out1:
*rvp = pvp;
out:
ASSERT_VOP_ELOCKED(vp, "child vnode must be locked on return");
return (error);
}
/*
* This function cleans the worklist for a filesystem.
* Each filesystem running with soft dependencies gets its own
@ -3096,40 +3246,84 @@ softdep_prealloc(vp, waitok)
}
/*
* Before adjusting a link count on a vnode verify that we have sufficient
* journal space. If not, process operations that depend on the currently
* locked pair of vnodes to try to flush space as the syncer, buf daemon,
* and softdep flush threads can not acquire these locks to reclaim space.
* Try hard to sync all data and metadata for the vnode, and workitems
* flushing which might conflict with the vnode lock. This is a
* helper for softdep_prerename().
*/
static void
softdep_prelink(dvp, vp)
struct vnode *dvp;
static int
softdep_prerename_vnode(ump, vp)
struct ufsmount *ump;
struct vnode *vp;
{
struct ufsmount *ump;
int error;
ump = VFSTOUFS(dvp->v_mount);
LOCK_OWNED(ump);
/*
* Nothing to do if we have sufficient journal space.
* If we currently hold the snapshot lock, we must avoid
* handling other resources that could cause deadlock.
*/
if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
return;
stat_journal_low++;
FREE_LOCK(ump);
if (vp)
ffs_syncvnode(vp, MNT_NOWAIT, 0);
ffs_syncvnode(dvp, MNT_WAIT, 0);
ASSERT_VOP_ELOCKED(vp, "prehandle");
if (vp->v_data == NULL)
return (0);
error = VOP_FSYNC(vp, MNT_WAIT, curthread);
if (error != 0)
return (error);
ACQUIRE_LOCK(ump);
/* Process vp before dvp as it may create .. removes. */
if (vp) {
process_removes(vp);
process_truncates(vp);
process_removes(vp);
process_truncates(vp);
FREE_LOCK(ump);
return (0);
}
/*
* Must be called from VOP_RENAME() after all vnodes are locked.
* Ensures that there is enough journal space for rename. It is
* sufficiently different from softdep_prelink() by having to handle
* four vnodes.
*/
int
softdep_prerename(fdvp, fvp, tdvp, tvp)
struct vnode *fdvp;
struct vnode *fvp;
struct vnode *tdvp;
struct vnode *tvp;
{
struct ufsmount *ump;
int error;
ump = VFSTOUFS(fdvp->v_mount);
if (journal_space(ump, 0))
return (0);
VOP_UNLOCK(tdvp);
VOP_UNLOCK(fvp);
if (tvp != NULL && tvp != tdvp)
VOP_UNLOCK(tvp);
error = softdep_prerename_vnode(ump, fdvp);
VOP_UNLOCK(fdvp);
if (error != 0)
return (error);
VOP_LOCK(fvp, LK_EXCLUSIVE | LK_RETRY);
error = softdep_prerename_vnode(ump, fvp);
VOP_UNLOCK(fvp);
if (error != 0)
return (error);
if (tdvp != fdvp) {
VOP_LOCK(tdvp, LK_EXCLUSIVE | LK_RETRY);
error = softdep_prerename_vnode(ump, tdvp);
VOP_UNLOCK(tdvp);
if (error != 0)
return (error);
}
process_removes(dvp);
process_truncates(dvp);
if (tvp != fvp && tvp != NULL) {
VOP_LOCK(tvp, LK_EXCLUSIVE | LK_RETRY);
error = softdep_prerename_vnode(ump, tvp);
VOP_UNLOCK(tvp);
if (error != 0)
return (error);
}
ACQUIRE_LOCK(ump);
softdep_speedup(ump);
process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
if (journal_space(ump, 0) == 0) {
@ -3137,6 +3331,121 @@ softdep_prelink(dvp, vp)
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
FREE_LOCK(ump);
return (ERELOOKUP);
}
/*
* Before adjusting a link count on a vnode verify that we have sufficient
* journal space. If not, process operations that depend on the currently
* locked pair of vnodes to try to flush space as the syncer, buf daemon,
* and softdep flush threads can not acquire these locks to reclaim space.
*
* Returns 0 if all owned locks are still valid and were not dropped
* in the process, in other case it returns either an error from sync,
* or ERELOOKUP if any of the locks were re-acquired. In the later
* case, the state of the vnodes cannot be relied upon and our VFS
* syscall must be restarted at top level from the lookup.
*/
int
softdep_prelink(dvp, vp, will_direnter)
struct vnode *dvp;
struct vnode *vp;
int will_direnter;
{
struct ufsmount *ump;
int error, error1;
ASSERT_VOP_ELOCKED(dvp, "prelink dvp");
if (vp != NULL)
ASSERT_VOP_ELOCKED(vp, "prelink vp");
ump = VFSTOUFS(dvp->v_mount);
/*
* Nothing to do if we have sufficient journal space.
* If we currently hold the snapshot lock, we must avoid
* handling other resources that could cause deadlock.
*
* will_direnter == 1: In case allocated a directory block in
* an indirect block, we must prevent holes in the directory
* created if directory entries are written out of order. To
* accomplish this we fsync when we extend a directory into
* indirects. During rename it's not safe to drop the tvp
* lock so sync must be delayed until it is.
*
* This synchronous step could be removed if fsck and the
* kernel were taught to fill in sparse directories rather
* than panic.
*/
if (journal_space(ump, 0) || (vp != NULL && IS_SNAPSHOT(VTOI(vp)))) {
error = 0;
if (will_direnter && (vp == NULL || !IS_SNAPSHOT(VTOI(vp)))) {
if (vp != NULL)
VOP_UNLOCK(vp);
error = ffs_syncvnode(dvp, MNT_WAIT, 0);
if (vp != NULL) {
error1 = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
if (error1 != 0) {
vn_lock_pair(dvp, true, vp, false);
if (error == 0)
error = ERELOOKUP;
} else if (vp->v_data == NULL) {
error = ERELOOKUP;
}
}
}
return (error);
}
stat_journal_low++;
if (vp != NULL) {
VOP_UNLOCK(dvp);
ffs_syncvnode(vp, MNT_NOWAIT, 0);
vn_lock_pair(dvp, false, vp, true);
if (dvp->v_data == NULL)
return (ERELOOKUP);
}
if (vp != NULL)
VOP_UNLOCK(vp);
ffs_syncvnode(dvp, MNT_WAIT, 0);
VOP_UNLOCK(dvp);
/* Process vp before dvp as it may create .. removes. */
if (vp != NULL) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_data == NULL) {
vn_lock_pair(dvp, false, vp, true);
return (ERELOOKUP);
}
ACQUIRE_LOCK(ump);
process_removes(vp);
process_truncates(vp);
FREE_LOCK(ump);
VOP_UNLOCK(vp);
}
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
if (dvp->v_data == NULL) {
vn_lock_pair(dvp, true, vp, false);
return (ERELOOKUP);
}
ACQUIRE_LOCK(ump);
process_removes(dvp);
process_truncates(dvp);
VOP_UNLOCK(dvp);
softdep_speedup(ump);
process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
if (journal_space(ump, 0) == 0) {
softdep_speedup(ump);
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
FREE_LOCK(ump);
vn_lock_pair(dvp, false, vp, false);
return (ERELOOKUP);
}
static void
@ -4742,7 +5051,6 @@ softdep_setup_create(dp, ip)
KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
("softdep_setup_create: No addref structure present."));
}
softdep_prelink(dvp, NULL);
FREE_LOCK(ITOUMP(dp));
}
@ -4777,7 +5085,6 @@ softdep_setup_dotdot_link(dp, ip)
if (jaddref)
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
if_deps);
softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@ -4808,7 +5115,6 @@ softdep_setup_link(dp, ip)
if (jaddref)
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
if_deps);
softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@ -4858,7 +5164,6 @@ softdep_setup_mkdir(dp, ip)
if (DOINGSUJ(dvp))
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
&dotdotaddref->ja_ref, if_deps);
softdep_prelink(ITOV(dp), NULL);
FREE_LOCK(ITOUMP(dp));
}
@ -4879,7 +5184,6 @@ softdep_setup_rmdir(dp, ip)
ACQUIRE_LOCK(ITOUMP(dp));
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@ -4900,7 +5204,6 @@ softdep_setup_unlink(dp, ip)
ACQUIRE_LOCK(ITOUMP(dp));
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@ -12622,25 +12925,12 @@ softdep_fsync(vp)
* for details on possible races.
*/
FREE_LOCK(ump);
if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
FFSV_FORCEINSMQ)) {
/*
* Unmount cannot proceed after unlock because
* caller must have called vn_start_write().
*/
VOP_UNLOCK(vp);
error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,
&pvp, FFSV_FORCEINSMQ);
MPASS(VTOI(pvp)->i_mode != 0);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (VN_IS_DOOMED(vp)) {
if (error == 0)
vput(pvp);
error = ENOENT;
}
if (error != 0)
return (error);
}
error = get_parent_vp(vp, mp, parentino, NULL, NULL, NULL,
&pvp);
if (error == ERELOOKUP)
error = 0;
if (error != 0)
return (error);
/*
* All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
* that are contained in direct blocks will be resolved by
@ -12964,9 +13254,11 @@ softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
for (i = 0; i < DAHASHSZ; i++) {
if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
continue;
if ((error = flush_pagedep_deps(vp, wk->wk_mp,
&pagedep->pd_diraddhd[i]))) {
BUF_NOREC(bp);
error = flush_pagedep_deps(vp, wk->wk_mp,
&pagedep->pd_diraddhd[i], bp);
if (error != 0) {
if (error != ERELOOKUP)
BUF_NOREC(bp);
goto out_unlock;
}
}
@ -13200,10 +13492,11 @@ flush_newblk_dep(vp, mp, lbn)
* Eliminate a pagedep dependency by flushing out all its diradd dependencies.
*/
static int
flush_pagedep_deps(pvp, mp, diraddhdp)
flush_pagedep_deps(pvp, mp, diraddhdp, locked_bp)
struct vnode *pvp;
struct mount *mp;
struct diraddhd *diraddhdp;
struct buf *locked_bp;
{
struct inodedep *inodedep;
struct inoref *inoref;
@ -13270,10 +13563,10 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
}
if (dap->da_state & MKDIR_BODY) {
FREE_LOCK(ump);
if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
FFSV_FORCEINSMQ)))
error = get_parent_vp(pvp, mp, inum, locked_bp,
diraddhdp, &unfinished, &vp);
if (error != 0)
break;
MPASS(VTOI(vp)->i_mode != 0);
error = flush_newblk_dep(vp, mp, 0);
/*
* If we still have the dependency we might need to
@ -13335,10 +13628,10 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
*/
if (dap == LIST_FIRST(diraddhdp)) {
FREE_LOCK(ump);
if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
FFSV_FORCEINSMQ)))
error = get_parent_vp(pvp, mp, inum, locked_bp,
diraddhdp, &unfinished, &vp);
if (error != 0)
break;
MPASS(VTOI(vp)->i_mode != 0);
error = ffs_update(vp, 1);
vput(vp);
if (error)

View File

@ -1861,8 +1861,14 @@ ffs_sync(mp, waitfor)
#ifdef QUOTA
qsyncvp(vp);
#endif
if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
allerror = error;
for (;;) {
error = ffs_syncvnode(vp, waitfor, 0);
if (error == ERELOOKUP)
continue;
if (error != 0)
allerror = error;
break;
}
vput(vp);
}
/*

View File

@ -253,7 +253,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
struct buf *bp, *nbp;
ufs_lbn_t lbn;
int error, passes;
bool still_dirty, wait;
bool still_dirty, unlocked, wait;
ip = VTOI(vp);
ip->i_flag &= ~IN_NEEDSYNC;
@ -277,6 +277,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
error = 0;
passes = 0;
wait = false; /* Always do an async pass first. */
unlocked = false;
lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
BO_LOCK(bo);
loop:
@ -325,6 +326,26 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
if (!LIST_EMPTY(&bp->b_dep) &&
(error = softdep_sync_buf(vp, bp,
wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
/*
* Lock order conflict, buffer was already unlocked,
* and vnode possibly unlocked.
*/
if (error == ERELOOKUP) {
if (vp->v_data == NULL)
return (EBADF);
unlocked = true;
if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
(error = softdep_sync_metadata(vp)) != 0) {
if (ffs_fsfail_cleanup(ump, error))
error = 0;
return (unlocked && error == 0 ?
ERELOOKUP : error);
}
/* Re-evaluate inode size */
lbn = lblkno(ITOFS(ip), (ip->i_size +
ITOFS(ip)->fs_bsize - 1));
goto next;
}
/* I/O error. */
if (error != EBUSY) {
BUF_UNLOCK(bp);
@ -361,9 +382,11 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
if (waitfor != MNT_WAIT) {
BO_UNLOCK(bo);
if ((flags & NO_INO_UPDT) != 0)
return (0);
else
return (ffs_update(vp, 0));
return (unlocked ? ERELOOKUP : 0);
error = ffs_update(vp, 0);
if (error == 0 && unlocked)
error = ERELOOKUP;
return (error);
}
/* Drain IO to see if we're done. */
bufobj_wwait(bo, 0, 0);
@ -419,6 +442,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
} else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
error = ffs_update(vp, 1);
}
if (error == 0 && unlocked)
error = ERELOOKUP;
return (error);
}

View File

@ -166,7 +166,8 @@ ufs_inactive(ap)
isize += ip->i_din2->di_extsize;
if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, NOCRED);
if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
if (ip->i_nlink <= 0 && ip->i_mode != 0 && !UFS_RDONLY(ip) &&
(vp->v_iflag & VI_OWEINACT) == 0) {
#ifdef QUOTA
if (!getinoquota(ip))
(void)chkiq(ip, -1, NOCRED, FORCE);
@ -207,10 +208,12 @@ ufs_inactive(ap)
* If we are done with the inode, reclaim it
* so that it can be reused immediately.
*/
if (ip->i_mode == 0)
if (ip->i_mode == 0 && (vp->v_iflag & VI_OWEINACT) == 0)
vrecycle(vp);
if (mp != NULL)
vn_finished_secondary_write(mp);
if (error == ERELOOKUP)
error = 0;
return (error);
}

View File

@ -961,27 +961,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename)
bdwrite(bp);
if ((dp->i_flag & IN_NEEDSYNC) == 0)
return (UFS_UPDATE(dvp, 0));
/*
* We have just allocated a directory block in an
* indirect block. We must prevent holes in the
* directory created if directory entries are
* written out of order. To accomplish this we
* fsync when we extend a directory into indirects.
* During rename it's not safe to drop the tvp lock
* so sync must be delayed until it is.
*
* This synchronous step could be removed if fsck and
* the kernel were taught to fill in sparse
* directories rather than panic.
*/
if (isrename)
return (0);
if (tvp != NULL)
VOP_UNLOCK(tvp);
(void) VOP_FSYNC(dvp, MNT_WAIT, td);
if (tvp != NULL)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
return (error);
return (0);
}
if (DOINGASYNC(dvp)) {
bdwrite(bp);

View File

@ -1006,10 +1006,16 @@ ufs_remove(ap)
td = curthread;
ip = VTOI(vp);
if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
(VTOI(dvp)->i_flags & APPEND)) {
error = EPERM;
goto out;
(VTOI(dvp)->i_flags & APPEND))
return (EPERM);
if (DOINGSOFTDEP(dvp)) {
error = softdep_prelink(dvp, vp, true);
if (error != 0) {
MPASS(error == ERELOOKUP);
return (error);
}
}
#ifdef UFS_GJOURNAL
ufs_gjournal_orphan(vp);
#endif
@ -1030,7 +1036,6 @@ ufs_remove(ap)
(void) VOP_FSYNC(dvp, MNT_WAIT, td);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
}
out:
return (error);
}
@ -1067,6 +1072,15 @@ ufs_link(ap)
if ((cnp->cn_flags & HASBUF) == 0)
panic("ufs_link: no name");
#endif
if (DOINGSOFTDEP(tdvp)) {
error = softdep_prelink(tdvp, vp, true);
if (error != 0) {
MPASS(error == ERELOOKUP);
return (error);
}
}
if (VTOI(tdvp)->i_effnlink < 2) {
print_bad_link_count("ufs_link", tdvp);
error = EINVAL;
@ -1089,6 +1103,7 @@ ufs_link(ap)
error = EPERM;
goto out;
}
ip->i_effnlink++;
ip->i_nlink++;
DIP_SET(ip, i_nlink, ip->i_nlink);
@ -1129,6 +1144,15 @@ ufs_whiteout(ap)
struct direct newdir;
int error = 0;
if (DOINGSOFTDEP(dvp) && (ap->a_flags == CREATE ||
ap->a_flags == DELETE)) {
error = softdep_prelink(dvp, NULL, true);
if (error != 0) {
MPASS(error == ERELOOKUP);
return (error);
}
}
switch (ap->a_flags) {
case LOOKUP:
/* 4.4 format directories support whiteout operations */
@ -1338,6 +1362,18 @@ ufs_rename(ap)
goto relock;
}
}
if (DOINGSOFTDEP(fdvp)) {
error = softdep_prerename(fdvp, fvp, tdvp, tvp);
if (error != 0) {
if (error == ERELOOKUP) {
atomic_add_int(&rename_restarts, 1);
goto relock;
}
goto releout;
}
}
fdp = VTOI(fdvp);
fip = VTOI(fvp);
tdp = VTOI(tdvp);
@ -1649,8 +1685,10 @@ ufs_rename(ap)
* are no longer needed.
*/
if (error == 0 && endoff != 0) {
error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
(DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
do {
error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
(DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
} while (error == ERELOOKUP);
if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error))
vn_printf(tdvp,
"ufs_rename: failed to truncate, error %d\n",
@ -1668,8 +1706,11 @@ ufs_rename(ap)
*/
error = 0;
}
if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
error = VOP_FSYNC(tdvp, MNT_WAIT, td);
if (error == 0 && tdp->i_flag & IN_NEEDSYNC) {
do {
error = VOP_FSYNC(tdvp, MNT_WAIT, td);
} while (error == ERELOOKUP);
}
vput(tdvp);
return (error);
@ -1918,6 +1959,7 @@ ufs_mkdir(ap)
}
dmode = vap->va_mode & 0777;
dmode |= IFDIR;
/*
* Must simulate part of ufs_makeinode here to acquire the inode,
* but not have it entered in the parent directory. The entry is
@ -1928,6 +1970,15 @@ ufs_mkdir(ap)
error = EINVAL;
goto out;
}
if (DOINGSOFTDEP(dvp)) {
error = softdep_prelink(dvp, NULL, true);
if (error != 0) {
MPASS(error == ERELOOKUP);
return (error);
}
}
error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
if (error)
goto out;
@ -2184,6 +2235,14 @@ ufs_rmdir(ap)
error = EINVAL;
goto out;
}
if (DOINGSOFTDEP(dvp)) {
error = softdep_prelink(dvp, vp, false);
if (error != 0) {
MPASS(error == ERELOOKUP);
return (error);
}
}
#ifdef UFS_GJOURNAL
ufs_gjournal_orphan(vp);
#endif
@ -2703,6 +2762,13 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
print_bad_link_count(callfunc, dvp);
return (EINVAL);
}
if (DOINGSOFTDEP(dvp)) {
error = softdep_prelink(dvp, NULL, true);
if (error != 0) {
MPASS(error == ERELOOKUP);
return (error);
}
}
error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
if (error)
return (error);