cache: add symlink support to lockless lookup

Reviewed by:	kib (previous version)
Tested by:	pho (previous version)
Differential Revision:	https://reviews.freebsd.org/D27488
This commit is contained in:
Mateusz Guzik 2021-01-23 13:40:48 +00:00
parent bfd75d4557
commit 739ecbcf1c
8 changed files with 228 additions and 38 deletions

View File

@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = {
.vop_unset_text = dead_unset_text,
.vop_write = dead_write,
.vop_fplookup_vexec = VOP_EOPNOTSUPP,
.vop_fplookup_symlink = VOP_EAGAIN,
};
VFS_VOP_VECTOR_REGISTER(dead_vnodeops);

View File

@ -562,6 +562,36 @@ static uma_zone_t __read_mostly cache_zone_small_ts;
static uma_zone_t __read_mostly cache_zone_large;
static uma_zone_t __read_mostly cache_zone_large_ts;
char *
cache_symlink_alloc(size_t size, int flags)
{
if (size < CACHE_ZONE_SMALL_SIZE) {
return (uma_zalloc_smr(cache_zone_small, flags));
}
if (size < CACHE_ZONE_LARGE_SIZE) {
return (uma_zalloc_smr(cache_zone_large, flags));
}
return (NULL);
}
void
cache_symlink_free(char *string, size_t size)
{
MPASS(string != NULL);
if (size < CACHE_ZONE_SMALL_SIZE) {
uma_zfree_smr(cache_zone_small, string);
return;
}
if (size < CACHE_ZONE_LARGE_SIZE) {
uma_zfree_smr(cache_zone_large, string);
return;
}
__assert_unreachable();
}
static struct namecache *
cache_alloc_uma(int len, bool ts)
{
@ -3584,6 +3614,7 @@ cache_fast_lookup_enabled_recalc(void)
#ifdef MAC
mac_on = mac_vnode_check_lookup_enabled();
mac_on |= mac_vnode_check_readlink_enabled();
#else
mac_on = 0;
#endif
@ -3615,6 +3646,7 @@ SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MP
* need restoring in case fast path lookup fails.
*/
struct nameidata_outer {
size_t ni_pathlen;
int cn_flags;
};
@ -3656,8 +3688,10 @@ static bool cache_fplookup_is_mp(struct cache_fpl *fpl);
static int cache_fplookup_cross_mount(struct cache_fpl *fpl);
static int cache_fplookup_partial_setup(struct cache_fpl *fpl);
static int cache_fplookup_skip_slashes(struct cache_fpl *fpl);
static int cache_fplookup_preparse(struct cache_fpl *fpl);
static void cache_fpl_pathlen_dec(struct cache_fpl *fpl);
static void cache_fpl_pathlen_inc(struct cache_fpl *fpl);
static void cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n);
static void cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n);
static void
@ -3698,6 +3732,7 @@ static void
cache_fpl_checkpoint_outer(struct cache_fpl *fpl)
{
fpl->snd_outer.ni_pathlen = fpl->ndp->ni_pathlen;
fpl->snd_outer.cn_flags = fpl->ndp->ni_cnd.cn_flags;
}
@ -3731,6 +3766,7 @@ cache_fpl_restore_abort(struct cache_fpl *fpl)
*/
fpl->ndp->ni_resflags = 0;
fpl->ndp->ni_cnd.cn_nameptr = fpl->ndp->ni_cnd.cn_pnbuf;
fpl->ndp->ni_pathlen = fpl->snd_outer.ni_pathlen;
}
#ifdef INVARIANTS
@ -3752,6 +3788,7 @@ cache_fpl_assert_status(struct cache_fpl *fpl)
case CACHE_FPL_STATUS_UNSET:
__assert_unreachable();
break;
case CACHE_FPL_STATUS_DESTROYED:
case CACHE_FPL_STATUS_ABORTED:
case CACHE_FPL_STATUS_PARTIAL:
case CACHE_FPL_STATUS_HANDLED:
@ -3804,6 +3841,11 @@ cache_fpl_aborted_early_impl(struct cache_fpl *fpl, int line)
static int __noinline
cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
{
struct nameidata *ndp;
struct componentname *cnp;
ndp = fpl->ndp;
cnp = fpl->cnp;
if (fpl->status != CACHE_FPL_STATUS_UNSET) {
KASSERT(fpl->status == CACHE_FPL_STATUS_PARTIAL,
@ -3815,6 +3857,14 @@ cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
if (fpl->in_smr)
cache_fpl_smr_exit(fpl);
cache_fpl_restore_abort(fpl);
/*
* Resolving symlinks overwrites data passed by the caller.
* Let namei know.
*/
if (ndp->ni_loopcnt > 0) {
fpl->status = CACHE_FPL_STATUS_DESTROYED;
cache_fpl_cleanup_cnp(cnp);
}
return (CACHE_FPL_FAILED);
}
@ -3955,13 +4005,6 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
return (0);
}
static bool
cache_fplookup_vnode_supported(struct vnode *vp)
{
return (vp->v_type != VLNK);
}
static int __noinline
cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp,
uint32_t hash)
@ -4242,8 +4285,7 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl)
* Since we expect this to be the terminal vnode it should
* almost never be true.
*/
if (__predict_false(!cache_fplookup_vnode_supported(tvp) ||
cache_fplookup_is_mp(fpl))) {
if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) {
vput(dvp);
vput(tvp);
return (cache_fpl_aborted(fpl));
@ -4546,8 +4588,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl)
return (cache_fpl_handled(fpl));
}
if (__predict_false(!cache_fplookup_vnode_supported(tvp) ||
cache_fplookup_is_mp(fpl))) {
if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) {
vput(dvp);
vput(tvp);
return (cache_fpl_aborted(fpl));
@ -4688,6 +4729,102 @@ cache_fplookup_neg(struct cache_fpl *fpl, struct namecache *ncp, uint32_t hash)
return (cache_fpl_handled_error(fpl, ENOENT));
}
/*
* Resolve a symlink. Called by filesystem-specific routines.
*
* Code flow is:
* ... -> cache_fplookup_symlink -> VOP_FPLOOKUP_SYMLINK -> cache_symlink_resolve
*/
int
cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len)
{
struct nameidata *ndp;
struct componentname *cnp;
ndp = fpl->ndp;
cnp = fpl->cnp;
if (__predict_false(len == 0)) {
return (ENOENT);
}
ndp->ni_pathlen = fpl->nulchar - cnp->cn_nameptr - cnp->cn_namelen + 1;
#ifdef INVARIANTS
if (ndp->ni_pathlen != fpl->debug.ni_pathlen) {
panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; full string [%s]\n",
__func__, ndp->ni_pathlen, fpl->debug.ni_pathlen, fpl->nulchar,
cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf);
}
#endif
if (__predict_false(len + ndp->ni_pathlen > MAXPATHLEN)) {
return (ENAMETOOLONG);
}
if (__predict_false(ndp->ni_loopcnt++ >= MAXSYMLINKS)) {
return (ELOOP);
}
if (ndp->ni_pathlen > 1) {
bcopy(ndp->ni_next, cnp->cn_pnbuf + len, ndp->ni_pathlen);
} else {
cnp->cn_pnbuf[len] = '\0';
}
bcopy(string, cnp->cn_pnbuf, len);
ndp->ni_pathlen += len;
cache_fpl_pathlen_add(fpl, len);
cnp->cn_nameptr = cnp->cn_pnbuf;
fpl->nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
return (0);
}
static int __noinline
cache_fplookup_symlink(struct cache_fpl *fpl)
{
struct nameidata *ndp;
struct componentname *cnp;
struct vnode *dvp, *tvp;
int error;
ndp = fpl->ndp;
cnp = fpl->cnp;
dvp = fpl->dvp;
tvp = fpl->tvp;
if (cache_fpl_islastcn(ndp)) {
if ((cnp->cn_flags & FOLLOW) == 0) {
return (cache_fplookup_final(fpl));
}
}
error = VOP_FPLOOKUP_SYMLINK(tvp, fpl);
if (__predict_false(error != 0)) {
switch (error) {
case EAGAIN:
return (cache_fpl_partial(fpl));
case ENOENT:
case ENAMETOOLONG:
case ELOOP:
cache_fpl_smr_exit(fpl);
return (cache_fpl_handled_error(fpl, error));
default:
return (cache_fpl_aborted(fpl));
}
}
if (*(cnp->cn_nameptr) == '/') {
fpl->dvp = cache_fpl_handle_root(fpl);
fpl->dvp_seqc = vn_seqc_read_any(fpl->dvp);
if (seqc_in_modify(fpl->dvp_seqc)) {
return (cache_fpl_aborted(fpl));
}
}
return (cache_fplookup_preparse(fpl));
}
static int
cache_fplookup_next(struct cache_fpl *fpl)
{
@ -4743,10 +4880,6 @@ cache_fplookup_next(struct cache_fpl *fpl)
return (cache_fpl_partial(fpl));
}
if (!cache_fplookup_vnode_supported(tvp)) {
return (cache_fpl_partial(fpl));
}
counter_u64_add(numposhits, 1);
SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp);
@ -4931,7 +5064,16 @@ static void
cache_fpl_pathlen_inc(struct cache_fpl *fpl)
{
fpl->debug.ni_pathlen++;
cache_fpl_pathlen_add(fpl, 1);
}
static void
cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
{
fpl->debug.ni_pathlen += n;
KASSERT(fpl->debug.ni_pathlen <= PATH_MAX,
("%s: pathlen overflow to %zd\n", __func__, fpl->debug.ni_pathlen));
}
static void
@ -4953,13 +5095,18 @@ cache_fpl_pathlen_inc(struct cache_fpl *fpl)
{
}
static void
cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
{
}
static void
cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n)
{
}
#endif
static int
static int __always_inline
cache_fplookup_preparse(struct cache_fpl *fpl)
{
struct componentname *cnp;
@ -5238,8 +5385,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
return (cache_fpl_aborted(fpl));
}
VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
error = cache_fplookup_preparse(fpl);
if (__predict_false(cache_fpl_terminated(fpl))) {
return (error);
@ -5251,8 +5396,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
break;
}
VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred);
if (__predict_false(error != 0)) {
error = cache_fplookup_failed_vexec(fpl, error);
@ -5266,20 +5409,27 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
if (cache_fpl_islastcn(ndp)) {
error = cache_fplookup_final(fpl);
break;
if (fpl->tvp->v_type == VLNK) {
error = cache_fplookup_symlink(fpl);
if (cache_fpl_terminated(fpl)) {
break;
}
} else {
if (cache_fpl_islastcn(ndp)) {
error = cache_fplookup_final(fpl);
break;
}
if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
error = cache_fpl_aborted(fpl);
break;
}
fpl->dvp = fpl->tvp;
fpl->dvp_seqc = fpl->tvp_seqc;
cache_fplookup_parse_advance(fpl);
}
if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
error = cache_fpl_aborted(fpl);
break;
}
fpl->dvp = fpl->tvp;
fpl->dvp_seqc = fpl->tvp_seqc;
cache_fplookup_parse_advance(fpl);
cache_fpl_checkpoint(fpl);
}

View File

@ -578,6 +578,7 @@ namei(struct nameidata *ndp)
ndp->ni_startdir->v_type == VBAD);
ndp->ni_lcf = 0;
ndp->ni_loopcnt = 0;
ndp->ni_vp = NULL;
error = namei_getpath(ndp);
@ -611,8 +612,16 @@ namei(struct nameidata *ndp)
TAILQ_INIT(&ndp->ni_cap_tracker);
dp = ndp->ni_startdir;
break;
case CACHE_FPL_STATUS_DESTROYED:
ndp->ni_loopcnt = 0;
error = namei_getpath(ndp);
if (__predict_false(error != 0)) {
return (error);
}
/* FALLTHROUGH */
case CACHE_FPL_STATUS_ABORTED:
TAILQ_INIT(&ndp->ni_cap_tracker);
MPASS(ndp->ni_lcf == 0);
error = namei_setup(ndp, &dp, &pwd);
if (error != 0) {
namei_cleanup_cnp(cnp);
@ -621,8 +630,6 @@ namei(struct nameidata *ndp)
break;
}
ndp->ni_loopcnt = 0;
/*
* Locked lookup.
*/

View File

@ -5452,6 +5452,19 @@ vop_fplookup_vexec_debugpost(void *ap __unused, int rc __unused)
VFS_SMR_ASSERT_ENTERED();
}
void
vop_fplookup_symlink_debugpre(void *ap __unused)
{
VFS_SMR_ASSERT_ENTERED();
}
void
vop_fplookup_symlink_debugpost(void *ap __unused, int rc __unused)
{
VFS_SMR_ASSERT_ENTERED();
}
void
vop_strategy_debugpre(void *ap)
{

View File

@ -156,6 +156,16 @@ vop_fplookup_vexec {
};
%% fplookup_symlink vp - - -
%! fplookup_symlink debugpre vop_fplookup_symlink_debugpre
%! fplookup_symlink debugpost vop_fplookup_symlink_debugpost
vop_fplookup_symlink {
IN struct vnode *vp;
IN struct cache_fpl *fpl;
};
%% access vp L L L
vop_access {

View File

@ -116,8 +116,8 @@ struct nameidata {
#ifdef _KERNEL
enum cache_fpl_status { CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL,
CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
enum cache_fpl_status { CACHE_FPL_STATUS_DESTROYED, CACHE_FPL_STATUS_ABORTED,
CACHE_FPL_STATUS_PARTIAL, CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
struct pwd **pwdp);

View File

@ -60,7 +60,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1400000 /* Master, propagated to newvers */
#define __FreeBSD_version 1400001 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,

View File

@ -66,6 +66,7 @@ enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT };
*/
struct namecache;
struct cache_fpl;
struct vpollinfo {
struct mtx vpi_lock; /* lock to protect below */
@ -646,6 +647,10 @@ void cache_purge(struct vnode *vp);
void cache_purge_vgone(struct vnode *vp);
void cache_purge_negative(struct vnode *vp);
void cache_purgevfs(struct mount *mp);
char *cache_symlink_alloc(size_t size, int flags);
void cache_symlink_free(char *string, size_t size);
int cache_symlink_resolve(struct cache_fpl *fpl, const char *string,
size_t len);
void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp);
@ -900,6 +905,8 @@ int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
#ifdef DEBUG_VFS_LOCKS
void vop_fplookup_vexec_debugpre(void *a);
void vop_fplookup_vexec_debugpost(void *a, int rc);
void vop_fplookup_symlink_debugpre(void *a);
void vop_fplookup_symlink_debugpost(void *a, int rc);
void vop_strategy_debugpre(void *a);
void vop_lock_debugpre(void *a);
void vop_lock_debugpost(void *a, int rc);
@ -910,6 +917,8 @@ void vop_mkdir_debugpost(void *a, int rc);
#else
#define vop_fplookup_vexec_debugpre(x) do { } while (0)
#define vop_fplookup_vexec_debugpost(x, y) do { } while (0)
#define vop_fplookup_symlink_debugpre(x) do { } while (0)
#define vop_fplookup_symlink_debugpost(x, y) do { } while (0)
#define vop_strategy_debugpre(x) do { } while (0)
#define vop_lock_debugpre(x) do { } while (0)
#define vop_lock_debugpost(x, y) do { } while (0)