vfs: vn_dir_next_dirent(): Simplify interface and harden

Simplify the old interface (one less argument, simpler termination test)
and add documentation about it. Add more sanity checks (mostly under
INVARIANTS, but also in the general case to prevent infinite
loops). Drop the explicit test on minimum directory entry size (without
INVARIANTS).

Deal with the impacts in callers (dirent_exists() and vop_stdvptocnp()).
dirent_exists() has been simplified a bit, preserving the exact same
semantics but for the return code whose meaning has been reversed (0 now
means the entry exists, ENOENT that it doesn't and other values are
genuine errors). While here, suppress gratuitous casts of malloc return
values.

vn_dir_next_dirent() has been tested by a 'make -j4 buildkernel' with a
temporary modification to the VFS cache causing vn_vptocnp() to always
call VOP_VPTOCNP() and finally vop_stdvptocnp() (observed with temporary
debug counters).

Export new _GENERIC_MINDIRSIZ and _GENERIC_MAXDIRSIZ on __BSD_VISIBLE,
and GENERIC_MINDIRSIZ and GENERIC_MAXDIRSIZ on _KERNEL.

Reviewed by:	kib
MFC after:	1 week
Differential Revision:	https://reviews.freebsd.org/D39764
This commit is contained in:
Olivier Certner 2023-04-24 10:25:15 +02:00 committed by Konstantin Belousov
parent 6bce3f23d0
commit 3d8450db4c
4 changed files with 187 additions and 74 deletions

View File

@ -276,49 +276,56 @@ vop_nostrategy (struct vop_strategy_args *ap)
}
/*
* Check if a named file exists in a given directory vnode.
* Check if a named file exists in a given directory vnode
*
* Returns 0 if the file exists, ENOENT if it doesn't, or errors returned by
* vn_dir_next_dirent().
*/
static int
dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
{
char *dirbuf, *cpos;
int error, eofflag, dirbuflen, len, found;
char *dirbuf;
int error, eofflag;
size_t dirbuflen, len;
off_t off;
struct dirent *dp;
struct vattr va;
KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
ASSERT_VOP_LOCKED(vp, "vnode not locked");
KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
found = 0;
error = VOP_GETATTR(vp, &va, td->td_ucred);
if (error)
return (found);
if (error != 0)
return (error);
dirbuflen = DEV_BSIZE;
dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ);
if (dirbuflen < va.va_blocksize)
dirbuflen = va.va_blocksize;
dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
off = 0;
len = 0;
do {
error = vn_dir_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
&cpos, &len, &eofflag, td);
if (error)
off = 0;
eofflag = 0;
for (;;) {
error = vn_dir_next_dirent(vp, td, dirbuf, dirbuflen,
&dp, &len, &off, &eofflag);
if (error != 0)
goto out;
if (len == 0)
break;
if (dp->d_type != DT_WHT && dp->d_fileno != 0 &&
strcmp(dp->d_name, dirname) == 0) {
found = 1;
strcmp(dp->d_name, dirname) == 0)
goto out;
}
} while (len > 0 || !eofflag);
}
error = ENOENT;
out:
free(dirbuf, M_TEMP);
return (found);
return (error);
}
int
@ -672,27 +679,24 @@ vop_stdvptofh(struct vop_vptofh_args *ap)
int
vop_stdvptocnp(struct vop_vptocnp_args *ap)
{
struct vnode *vp = ap->a_vp;
struct vnode **dvp = ap->a_vpp;
struct ucred *cred;
struct vnode *const vp = ap->a_vp;
struct vnode **const dvp = ap->a_vpp;
char *buf = ap->a_buf;
size_t *buflen = ap->a_buflen;
char *dirbuf, *cpos;
int i, error, eofflag, dirbuflen, flags, locked, len, covered;
char *dirbuf;
int i = *buflen;
int error = 0, covered = 0;
int eofflag, flags, locked;
size_t dirbuflen, len;
off_t off;
ino_t fileno;
struct vattr va;
struct nameidata nd;
struct thread *td;
struct thread *const td = curthread;
struct ucred *const cred = td->td_ucred;
struct dirent *dp;
struct vnode *mvp;
i = *buflen;
error = 0;
covered = 0;
td = curthread;
cred = td->td_ucred;
if (vp->v_type != VDIR)
return (ENOENT);
@ -729,31 +733,38 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap)
fileno = va.va_fileid;
dirbuflen = DEV_BSIZE;
dirbuflen = MAX(DEV_BSIZE, GENERIC_MAXDIRSIZ);
if (dirbuflen < va.va_blocksize)
dirbuflen = va.va_blocksize;
dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
if ((*dvp)->v_type != VDIR) {
error = ENOENT;
goto out;
}
off = 0;
len = 0;
do {
off = 0;
eofflag = 0;
for (;;) {
/* call VOP_READDIR of parent */
error = vn_dir_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
&cpos, &len, &eofflag, td);
if (error)
error = vn_dir_next_dirent(*dvp, td,
dirbuf, dirbuflen, &dp, &len, &off, &eofflag);
if (error != 0)
goto out;
if (len == 0) {
error = ENOENT;
goto out;
}
if ((dp->d_type != DT_WHT) &&
(dp->d_fileno == fileno)) {
if (covered) {
VOP_UNLOCK(*dvp);
vn_lock(mvp, LK_SHARED | LK_RETRY);
if (dirent_exists(mvp, dp->d_name, td)) {
if (dirent_exists(mvp, dp->d_name, td) == 0) {
error = ENOENT;
VOP_UNLOCK(mvp);
vn_lock(*dvp, LK_SHARED | LK_RETRY);
@ -776,8 +787,7 @@ vop_stdvptocnp(struct vop_vptocnp_args *ap)
}
goto out;
}
} while (len > 0 || !eofflag);
error = ENOENT;
}
out:
free(dirbuf, M_TEMP);

View File

@ -3737,22 +3737,112 @@ vn_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags,
return (error);
}
#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
/*
* Keep this assert as long as sizeof(struct dirent) is used as the maximum
* entry size.
*/
_Static_assert(_GENERIC_MAXDIRSIZ == sizeof(struct dirent),
"'struct dirent' size must be a multiple of its alignment "
"(see _GENERIC_DIRLEN())");
/*
* Returns successive directory entries through some caller's provided buffer
*
* This function automatically refills the provided buffer with calls to
* VOP_READDIR() (after MAC permission checks).
*
* 'td' is used for credentials and passed to uiomove(). 'dirbuf' is the
* caller's buffer to fill and 'dirbuflen' its allocated size. 'dirbuf' must be
* properly aligned to access 'struct dirent' structures and 'dirbuflen' must
* be greater than GENERIC_MAXDIRSIZ to avoid VOP_READDIR() returning EINVAL
* (the latter is not a strong guarantee (yet); but EINVAL will always be
* returned if this requirement is not verified). '*dpp' points to the current
* directory entry in the buffer and '*len' contains the remaining valid bytes
* in 'dirbuf' after 'dpp' (including the pointed entry).
*
* At first call (or when restarting the read), '*len' must have been set to 0,
* '*off' to 0 (or any valid start offset) and '*eofflag' to 0. There are no
* more entries as soon as '*len' is 0 after a call that returned 0. Calling
* again this function after such a condition is considered an error and EINVAL
* will be returned. Other possible error codes are those of VOP_READDIR(),
* EINTEGRITY if the returned entries do not pass coherency tests, or EINVAL
* (bad call). All errors are unrecoverable, i.e., the state ('*len', '*off'
* and '*eofflag') must be re-initialized before a subsequent call. On error or
* at end of directory, '*dpp' is reset to NULL.
*
* '*len', '*off' and '*eofflag' are internal state the caller should not
* tamper with except as explained above. '*off' is the next directory offset
* to read from to refill the buffer. '*eofflag' is set to 0 or 1 by the last
* internal call to VOP_READDIR() that returned without error, indicating
* whether it reached the end of the directory, and to 2 by this function after
* all entries have been read.
*/
int
vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
int dirbuflen, off_t *off, char **cpos, int *len,
int *eofflag, struct thread *td)
vn_dir_next_dirent(struct vnode *vp, struct thread *td,
char *dirbuf, size_t dirbuflen,
struct dirent **dpp, size_t *len, off_t *off, int *eofflag)
{
int error, reclen;
struct dirent *dp = NULL;
int reclen;
int error;
struct uio uio;
struct iovec iov;
struct dirent *dp;
KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
ASSERT_VOP_LOCKED(vp, "vnode not locked");
VNASSERT(vp->v_type == VDIR, vp, ("vnode is not a directory"));
MPASS2((uintptr_t)dirbuf < (uintptr_t)dirbuf + dirbuflen,
"Address space overflow");
if (__predict_false(dirbuflen < GENERIC_MAXDIRSIZ)) {
/* Don't take any chances in this case */
error = EINVAL;
goto out;
}
if (*len != 0) {
dp = *dpp;
/*
* The caller continued to call us after an error (we set dp to
* NULL in a previous iteration). Bail out right now.
*/
if (__predict_false(dp == NULL))
return (EINVAL);
MPASS(*len <= dirbuflen);
MPASS2((uintptr_t)dirbuf <= (uintptr_t)dp &&
(uintptr_t)dp + *len <= (uintptr_t)dirbuf + dirbuflen,
"Filled range not inside buffer");
reclen = dp->d_reclen;
if (reclen >= *len) {
/* End of buffer reached */
*len = 0;
} else {
dp = (struct dirent *)((char *)dp + reclen);
*len -= reclen;
}
}
if (*len == 0) {
dp = NULL;
/* Have to refill. */
switch (*eofflag) {
case 0:
break;
case 1:
/* Nothing more to read. */
*eofflag = 2; /* Remember the caller reached EOF. */
goto success;
default:
/* The caller didn't test for EOF. */
error = EINVAL;
goto out;
}
iov.iov_base = dirbuf;
iov.iov_len = dirbuflen;
@ -3764,40 +3854,50 @@ vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
uio.uio_rw = UIO_READ;
uio.uio_td = td;
*eofflag = 0;
#ifdef MAC
error = mac_vnode_check_readdir(td->td_ucred, vp);
if (error == 0)
#endif
error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
NULL, NULL);
if (error)
return (error);
NULL, NULL);
if (error != 0)
goto out;
*len = dirbuflen - uio.uio_resid;
*off = uio.uio_offset;
*cpos = dirbuf;
*len = (dirbuflen - uio.uio_resid);
if (*len == 0) {
/* Sanity check on INVARIANTS. */
MPASS(*eofflag != 0);
*eofflag = 1;
goto success;
}
if (*len == 0)
return (ENOENT);
/*
* Normalize the flag returned by VOP_READDIR(), since we use 2
* as a sentinel value.
*/
if (*eofflag != 0)
*eofflag = 1;
dp = (struct dirent *)dirbuf;
}
dp = (struct dirent *)(*cpos);
reclen = dp->d_reclen;
if (__predict_false(*len < GENERIC_MINDIRSIZ ||
dp->d_reclen < GENERIC_MINDIRSIZ)) {
error = EINTEGRITY;
dp = NULL;
goto out;
}
success:
error = 0;
out:
*dpp = dp;
/* check for malformed directory.. */
if (reclen < DIRENT_MINSIZE)
return (EINVAL);
*cpos += reclen;
*len -= reclen;
return (0);
return (error);
}
static u_long vn_lock_pair_pause_cnt;
SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD,
&vn_lock_pair_pause_cnt, 0,

View File

@ -122,11 +122,14 @@ struct freebsd11_dirent {
#define _GENERIC_DIRLEN(namlen) \
((__offsetof(struct dirent, d_name) + (namlen) + 1 + 7) & ~7)
#define _GENERIC_DIRSIZ(dp) _GENERIC_DIRLEN((dp)->d_namlen)
#define _GENERIC_MINDIRSIZ _GENERIC_DIRLEN(1) /* Name must not be empty */
#define _GENERIC_MAXDIRSIZ _GENERIC_DIRLEN(MAXNAMLEN)
#endif /* __BSD_VISIBLE */
#ifdef _KERNEL
#define GENERIC_DIRSIZ(dp) _GENERIC_DIRSIZ(dp)
#define GENERIC_MINDIRSIZ _GENERIC_MINDIRSIZ
#define GENERIC_MAXDIRSIZ _GENERIC_MAXDIRSIZ
/*
* Ensure that padding bytes are zeroed and that the name is NUL-terminated.
*/

View File

@ -1099,9 +1099,9 @@ void vfs_hash_remove(struct vnode *vp);
int vfs_kqfilter(struct vop_kqfilter_args *);
struct dirent;
int vn_dir_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
int dirbuflen, off_t *off, char **cpos, int *len,
int *eofflag, struct thread *td);
int vn_dir_next_dirent(struct vnode *vp, struct thread *td,
char *dirbuf, size_t dirbuflen,
struct dirent **dpp, size_t *len, off_t *off, int *eofflag);
int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off);
int vfs_emptydir(struct vnode *vp);