vfs: add realpathat syscall

realpath(3) is used a lot e.g., by clang and is a major source of getcwd
and fstatat calls. This can be done more efficiently in the kernel.

This works by performing a regular lookup while saving the name and found
parent directory. If the terminal vnode is a directory we can resolve it using
usual means. Otherwise we can use the name saved by lookup and resolve the
parent.

See the review for sample syscall counts.

Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D23574
This commit is contained in:
Mateusz Guzik 2020-02-20 16:58:19 +00:00
parent 43623d0ce7
commit 0573d0a9b8
9 changed files with 205 additions and 28 deletions

View File

@ -42,14 +42,19 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "un-namespace.h"
#include "libc_private.h"
extern int __realpathat(int fd, const char *path, char *buf, size_t size,
int flags);
/*
* Find the real name of path, by removing all ".", ".." and symlink
* components. Returns (resolved) on success, or (NULL) on failure,
* in which case the path which caused trouble is left in (resolved).
*/
static char *
static char * __noinline
realpath1(const char *path, char *resolved)
{
struct stat sb;
@ -223,6 +228,10 @@ realpath(const char * __restrict path, char * __restrict resolved)
if (resolved == NULL)
return (NULL);
}
if (__getosreldate() >= 1300080) {
if (__realpathat(AT_FDCWD, path, resolved, PATH_MAX, 0) == 0)
return (resolved);
}
res = realpath1(path, resolved);
if (res == NULL)
free(m);

View File

@ -49,7 +49,7 @@ _libc_other_objects= sigsetjmp lstat stat fstat fstatat fstatfs syscall \
sysarch __sysctl issetugid __getcwd utrace \
thr_self thr_kill pread mmap lseek _exit _fstat _fstatat _fstatfs \
getdirentries _getdirentries _close _fcntl _open _openat _read \
_sigprocmask _write readlink _setjmp setjmp setjmperr
_sigprocmask _write readlink __realpathat _setjmp setjmp setjmperr
# Finally add additional architecture-dependent libc dependencies

View File

@ -657,6 +657,7 @@
#define AUE_LGETUUID 43261 /* CADETS. */
#define AUE_EXECVEAT 43262 /* FreeBSD/Linux. */
#define AUE_SHMRENAME 43263 /* FreeBSD-specific. */
#define AUE_REALPATHAT 43264 /* FreeBSD-specific. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the

View File

@ -1160,5 +1160,7 @@
572 AUE_SHMRENAME NOPROTO { int shm_rename(const char *path_from, \
const char *path_to, int flags); }
573 AUE_NULL NOPROTO { int sigfastblock(int cmd, uint32_t *ptr); }
574 AUE_REALPATHAT NOPROTO { int __realpathat(int fd, const char *path, \
char *buf, size_t size, int flags); }
; vim: syntax=off

View File

@ -3218,6 +3218,15 @@
_Inout_opt_ uint32_t *ptr
);
}
574 AUE_REALPATHAT STD {
int __realpathat(
int fd,
_In_z_ const char *path,
_Out_writes_z_(size) char *buf,
size_t size,
int flags
);
}
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master

View File

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/capsicum.h>
#include <sys/counter.h>
#include <sys/filedesc.h>
#include <sys/fnv_hash.h>
@ -387,8 +388,12 @@ STATNODE_COUNTER(shrinking_skipped,
"Number of times shrinking was already in progress");
static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
static int vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf,
char **freebuf, size_t *buflen);
static int vn_fullpath_any(struct thread *td, struct vnode *vp, struct vnode *rdir,
char *buf, char **retbuf, size_t *buflen);
static int vn_fullpath_dir(struct thread *td, struct vnode *vp, struct vnode *rdir,
char *buf, char **retbuf, size_t *len, bool slash_prefixed, size_t addend);
static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
@ -2201,7 +2206,7 @@ vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen)
rdir = fdp->fd_rdir;
vrefact(rdir);
FILEDESC_SUNLOCK(fdp);
error = vn_fullpath1(td, cdir, rdir, buf, retbuf, buflen);
error = vn_fullpath_any(td, cdir, rdir, buf, retbuf, buflen);
vrele(rdir);
vrele(cdir);
@ -2212,6 +2217,37 @@ vn_getcwd(struct thread *td, char *buf, char **retbuf, size_t *buflen)
return (error);
}
static int
kern___realpathat(struct thread *td, int fd, const char *path, char *buf,
size_t size, int flags, enum uio_seg pathseg)
{
struct nameidata nd;
char *retbuf, *freebuf;
int error;
if (flags != 0)
return (EINVAL);
NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | SAVENAME | WANTPARENT | AUDITVNODE1,
pathseg, path, fd, &cap_fstat_rights, td);
if ((error = namei(&nd)) != 0)
return (error);
error = vn_fullpath_hardlink(td, &nd, &retbuf, &freebuf, &size);
if (error == 0) {
error = copyout(retbuf, buf, size);
free(freebuf, M_TEMP);
}
NDFREE(&nd, 0);
return (error);
}
int
sys___realpathat(struct thread *td, struct __realpathat_args *uap)
{
return (kern___realpathat(td, uap->fd, uap->path, uap->buf, uap->size,
uap->flags, UIO_USERSPACE));
}
/*
* Retrieve the full filesystem path that correspond to a vnode from the name
* cache (if available)
@ -2235,7 +2271,7 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
rdir = fdp->fd_rdir;
vrefact(rdir);
FILEDESC_SUNLOCK(fdp);
error = vn_fullpath1(td, vn, rdir, buf, retbuf, &buflen);
error = vn_fullpath_any(td, vn, rdir, buf, retbuf, &buflen);
vrele(rdir);
if (!error)
@ -2263,7 +2299,7 @@ vn_fullpath_global(struct thread *td, struct vnode *vn,
return (EINVAL);
buflen = MAXPATHLEN;
buf = malloc(buflen, M_TEMP, M_WAITOK);
error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, &buflen);
error = vn_fullpath_any(td, vn, rootvnode, buf, retbuf, &buflen);
if (!error)
*freebuf = buf;
else
@ -2334,40 +2370,40 @@ vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, size_t *buflen)
}
/*
* The magic behind vn_getcwd() and vn_fullpath().
* Resolve a directory to a pathname.
*
* The name of the directory can always be found in the namecache or fetched
* from the filesystem. There is also guaranteed to be only one parent, meaning
* we can just follow vnodes up until we find the root.
*
* The vnode must be referenced.
*/
static int
vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
char *buf, char **retbuf, size_t *len)
vn_fullpath_dir(struct thread *td, struct vnode *vp, struct vnode *rdir,
char *buf, char **retbuf, size_t *len, bool slash_prefixed, size_t addend)
{
int error, slash_prefixed;
#ifdef KDTRACE_HOOKS
struct vnode *startvp = vp;
#endif
struct vnode *vp1;
size_t buflen;
int error;
VNPASS(vp->v_type == VDIR || VN_IS_DOOMED(vp), vp);
VNPASS(vp->v_usecount > 0, vp);
buflen = *len;
buflen--;
buf[buflen] = '\0';
if (!slash_prefixed) {
MPASS(*len >= 2);
buflen--;
buf[buflen] = '\0';
}
error = 0;
slash_prefixed = 0;
SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
counter_u64_add(numfullpathcalls, 1);
vref(vp);
if (vp->v_type != VDIR) {
error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen);
if (error)
return (error);
if (buflen == 0) {
vrele(vp);
return (ENOMEM);
}
buf[--buflen] = '/';
slash_prefixed = 1;
}
while (vp != rdir && vp != rootvnode) {
/*
* The vp vnode must be already fully constructed,
@ -2420,7 +2456,7 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
break;
}
buf[--buflen] = '/';
slash_prefixed = 1;
slash_prefixed = true;
}
if (error)
return (error);
@ -2437,12 +2473,128 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
counter_u64_add(numfullpathfound, 1);
vrele(vp);
SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
*retbuf = buf + buflen;
SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, *retbuf);
*len -= buflen;
*len += addend;
return (0);
}
/*
* Resolve an arbitrary vnode to a pathname.
*
* Note 2 caveats:
* - hardlinks are not tracked, thus if the vnode is not a directory this can
* resolve to a different path than the one used to find it
* - namecache is not mandatory, meaning names are not guaranteed to be added
* (in which case resolving fails)
*/
static int
vn_fullpath_any(struct thread *td, struct vnode *vp, struct vnode *rdir,
char *buf, char **retbuf, size_t *buflen)
{
size_t orig_buflen;
bool slash_prefixed;
int error;
if (*buflen < 2)
return (EINVAL);
orig_buflen = *buflen;
vref(vp);
slash_prefixed = false;
if (vp->v_type != VDIR) {
*buflen -= 1;
buf[*buflen] = '\0';
error = vn_vptocnp(&vp, td->td_ucred, buf, buflen);
if (error)
return (error);
if (*buflen == 0) {
vrele(vp);
return (ENOMEM);
}
*buflen -= 1;
buf[*buflen] = '/';
slash_prefixed = true;
}
return (vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, slash_prefixed,
orig_buflen - *buflen));
}
/*
* Resolve an arbitrary vnode to a pathname (taking care of hardlinks).
*
* Since the namecache does not track handlings, the caller is expected to first
* look up the target vnode with SAVENAME | WANTPARENT flags passed to namei.
*
* Then we have 2 cases:
* - if the found vnode is a directory, the path can be constructed just by
* fullowing names up the chain
* - otherwise we populate the buffer with the saved name and start resolving
* from the parent
*/
static int
vn_fullpath_hardlink(struct thread *td, struct nameidata *ndp, char **retbuf,
char **freebuf, size_t *buflen)
{
char *buf, *tmpbuf;
struct filedesc *fdp;
struct vnode *rdir;
struct componentname *cnp;
struct vnode *vp;
size_t addend;
int error;
bool slash_prefixed;
if (*buflen < 2)
return (EINVAL);
if (*buflen > MAXPATHLEN)
*buflen = MAXPATHLEN;
slash_prefixed = false;
buf = malloc(*buflen, M_TEMP, M_WAITOK);
fdp = td->td_proc->p_fd;
FILEDESC_SLOCK(fdp);
rdir = fdp->fd_rdir;
vrefact(rdir);
FILEDESC_SUNLOCK(fdp);
addend = 0;
vp = ndp->ni_vp;
if (vp->v_type != VDIR) {
cnp = &ndp->ni_cnd;
addend = cnp->cn_namelen + 2;
if (*buflen < addend) {
error = ENOMEM;
goto out_bad;
}
*buflen -= addend;
tmpbuf = buf + *buflen;
tmpbuf[0] = '/';
memcpy(&tmpbuf[1], cnp->cn_nameptr, cnp->cn_namelen);
tmpbuf[addend - 1] = '\0';
slash_prefixed = true;
vp = ndp->ni_dvp;
}
vref(vp);
error = vn_fullpath_dir(td, vp, rdir, buf, retbuf, buflen, slash_prefixed, addend);
if (error != 0)
goto out_bad;
vrele(rdir);
*freebuf = buf;
return (0);
out_bad:
vrele(rdir);
free(buf, M_TEMP);
return (error);
}
struct vnode *
vn_dir_dd_ino(struct vnode *vp)
{

View File

@ -830,6 +830,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
case AUE_UNLINK:
case AUE_UNLINKAT:
case AUE_UTIMES:
case AUE_REALPATHAT:
ATFD1_TOKENS(1);
UPATH1_VNODE1_TOKENS;
break;

View File

@ -60,7 +60,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1300079 /* Master, propagated to newvers */
#define __FreeBSD_version 1300080 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,

View File

@ -115,6 +115,9 @@ static struct syscall decoded_syscalls[] = {
.args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } },
{ .name = "__getcwd", .ret_type = 1, .nargs = 2,
.args = { { Name | OUT, 0 }, { Int, 1 } } },
{ .name = "__realpathat", .ret_type = 1, .nargs = 5,
.args = { { Atfd, 0 }, { Name | IN, 1 }, { Name | OUT, 2 },
{ Sizet, 3 }, { Int, 4} } },
{ .name = "_umtx_op", .ret_type = 1, .nargs = 5,
.args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 },
{ Ptr, 4 } } },