Allow some VOPs to return ERELOOKUP to indicate VFS operation restart at top level.

Restart syscalls and some sync operations when filesystem indicated
ERELOOKUP condition, mostly for VOPs operating on metdata.  In
particular, lookup results cached in the inode/v_data is no longer
valid and needs recalculating.  Right now this should be nop.

Assert that ERELOOKUP is catched everywhere and not returned to
userspace, by asserting that td_errno != ERELOOKUP on syscall return
path.

In collaboration with:	pho
Reviewed by:	mckusick (previous version), markj
Tested by:	markj (syzkaller), pho
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D26136
This commit is contained in:
Konstantin Belousov 2020-11-13 09:42:32 +00:00
parent 7cde2ec4fd
commit 441eb16a95
4 changed files with 39 additions and 7 deletions

View File

@ -217,6 +217,8 @@ syscallret(struct thread *td)
KASSERT((td->td_pflags & TDP_FORKING) == 0,
("fork() did not clear TDP_FORKING upon completion"));
KASSERT(td->td_errno != ERELOOKUP,
("ERELOOKUP not consumed syscall %d", td->td_sa.code));
p = td->td_proc;
sa = &td->td_sa;

View File

@ -671,6 +671,8 @@ restart:
vput(nd.ni_dvp);
if (error) {
vn_finished_write(mp);
if (error == ERELOOKUP)
goto restart;
goto error;
}
vp = nd.ni_vp;

View File

@ -1937,7 +1937,10 @@ bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
}
if (bo->bo_dirty.bv_cnt > 0) {
BO_UNLOCK(bo);
if ((error = BO_SYNC(bo, MNT_WAIT)) != 0)
do {
error = BO_SYNC(bo, MNT_WAIT);
} while (error == ERELOOKUP);
if (error != 0)
return (error);
/*
* XXX We could save a lock/unlock if this was only
@ -3678,7 +3681,9 @@ loop:
vm_object_page_clean(vp->v_object, 0, 0, 0);
VM_OBJECT_WUNLOCK(vp->v_object);
}
error = VOP_FSYNC(vp, MNT_WAIT, td);
do {
error = VOP_FSYNC(vp, MNT_WAIT, td);
} while (error == ERELOOKUP);
if (error != 0) {
VOP_UNLOCK(vp);
vdrop(vp);

View File

@ -1384,6 +1384,8 @@ restart:
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
if (error == ERELOOKUP)
goto restart;
return (error);
}
@ -1470,6 +1472,8 @@ out:
vput(nd.ni_dvp);
vn_finished_write(mp);
NDFREE(&nd, NDF_ONLY_PNBUF);
if (error == ERELOOKUP)
goto restart;
return (error);
}
@ -1568,7 +1572,7 @@ kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
} while (error == EAGAIN);
} while (error == EAGAIN || error == ERELOOKUP);
return (error);
}
@ -1741,6 +1745,8 @@ out2:
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
if (error == ERELOOKUP)
goto restart;
out:
if (segflg != UIO_SYSSPACE)
uma_zfree(namei_zone, tmppath);
@ -1791,6 +1797,8 @@ restart:
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
if (error == ERELOOKUP)
goto restart;
return (error);
}
@ -1937,6 +1945,8 @@ out:
vrele(vp);
else
vput(vp);
if (error == ERELOOKUP)
goto restart;
fdout:
if (fp != NULL)
fdrop(fp, td);
@ -3395,7 +3405,8 @@ kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg,
int error;
if (length < 0)
return(EINVAL);
return (EINVAL);
retry:
NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
if ((error = namei(&nd)) != 0)
return (error);
@ -3424,6 +3435,8 @@ kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg,
vn_finished_write(mp);
vn_rangelock_unlock(vp, rl_cookie);
vrele(vp);
if (error == ERELOOKUP)
goto retry;
return (error);
}
@ -3479,6 +3492,7 @@ kern_fsync(struct thread *td, int fd, bool fullsync)
if (!fullsync)
/* XXXKIB: compete outstanding aio writes */;
#endif
retry:
error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
if (error != 0)
goto drop;
@ -3498,6 +3512,8 @@ kern_fsync(struct thread *td, int fd, bool fullsync)
error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
VOP_UNLOCK(vp);
vn_finished_write(mp);
if (error == ERELOOKUP)
goto retry;
drop:
fdrop(fp, td);
return (error);
@ -3679,7 +3695,7 @@ again:
* are links to the same vnode), then there is nothing to do.
*/
if (fvp == tvp)
error = -1;
error = ERESTART;
#ifdef MAC
else
error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
@ -3708,8 +3724,10 @@ out:
out1:
if (fromnd.ni_startdir)
vrele(fromnd.ni_startdir);
if (error == -1)
if (error == ERESTART)
return (0);
if (error == ERELOOKUP)
goto again;
return (error);
}
@ -3803,6 +3821,8 @@ out:
if (error == 0)
vput(nd.ni_vp);
vn_finished_write(mp);
if (error == ERELOOKUP)
goto restart;
return (error);
}
@ -3903,6 +3923,8 @@ out:
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
if (error == ERELOOKUP)
goto restart;
fdout:
if (fp != NULL)
fdrop(fp, td);
@ -4416,7 +4438,8 @@ kern_fhlinkat(struct thread *td, int fd, const char *path,
if (error != 0)
return (error);
VOP_UNLOCK(vp);
} while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN);
error = kern_linkat_vp(td, vp, fd, path, pathseg);
} while (error == EAGAIN || error == ERELOOKUP);
return (error);
}