Break recursion involving getnewvnode and zfs_rmnode.

When we're at our vnode limit, getnewvnode will call into the vnode LRU
cache to free up vnodes. If the vnode we try to recycle is a ZFS vnode we
end up, eventually, in zfs_rmnode. If the ZFS vnode we're recycling
represents something with extended attributes, zfs_rmnode will call
zfs_zget which will attempt to allocate another vnode. If the next vnode we
try to recycle is also a ZFS vnode representing something with extended
attributes we can recurse further. This ends up being unbounded and can end
up overflowing the stack.

In order to avoid this, restructure zfs_rmnode to simply add the extended
attribute directory's object ID to the unlinked set, thus not requiring the
allocation of a vnode. We then schedule a task that calls zfs_unlinked_drain
which will do the work of properly marking the vnodes for unlinking.
zfs_unlinked_drain is also called on mount so these will be cleaned up
there.

Reviewed by:	avg, mav
Sponsored by:	iXsystems, Inc.
Differential Revision:	https://reviews.freebsd.org/D15342
This commit is contained in:
Benno Rice 2018-06-07 18:59:32 +00:00
parent 52cf896593
commit b3b11d6400
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=334810
3 changed files with 66 additions and 13 deletions

View File

@ -85,6 +85,9 @@ struct zfsvfs {
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
#define ZFS_OBJ_MTX_SZ 64
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
#if defined(__FreeBSD__)
struct task z_unlinked_drain_task;
#endif
};
/*

View File

@ -281,6 +281,7 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
zap_attribute_t zap;
dmu_object_info_t doi;
znode_t *zp;
dmu_tx_t *tx;
int error;
/*
@ -318,6 +319,19 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY);
zp->z_unlinked = B_TRUE;
#if defined(__FreeBSD__)
/*
* Due to changes in zfs_rmnode we need to make sure the
* link count is set to zero here.
*/
zp->z_links = 0;
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
&zp->z_links, sizeof (zp->z_links), tx));
dmu_tx_commit(tx);
#endif
vput(ZTOV(zp));
}
zap_cursor_fini(&zc);
@ -393,7 +407,6 @@ zfs_rmnode(znode_t *zp)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
objset_t *os = zfsvfs->z_os;
znode_t *xzp = NULL;
dmu_tx_t *tx;
uint64_t acl_obj;
uint64_t xattr_obj;
@ -443,11 +456,8 @@ zfs_rmnode(znode_t *zp)
*/
error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
&xattr_obj, sizeof (xattr_obj));
if (error == 0 && xattr_obj) {
error = zfs_zget(zfsvfs, xattr_obj, &xzp);
ASSERT3S(error, ==, 0);
vn_lock(ZTOV(xzp), LK_EXCLUSIVE | LK_RETRY);
}
if (error)
xattr_obj = 0;
acl_obj = zfs_external_acl(zp);
@ -457,10 +467,8 @@ zfs_rmnode(znode_t *zp)
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
if (xzp) {
if (xattr_obj)
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
}
if (acl_obj)
dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
@ -475,9 +483,25 @@ zfs_rmnode(znode_t *zp)
dmu_tx_abort(tx);
zfs_znode_dmu_fini(zp);
zfs_znode_free(zp);
goto out;
return;
}
#if defined(__FreeBSD__)
/*
* FreeBSD's implemention of zfs_zget requires a vnode to back it.
* This means that we could end up calling into getnewvnode while
* calling zfs_rmnode as a result of a prior call to getnewvnode
* trying to clear vnodes out of the cache. If this repeats we can
* recurse enough that we overflow our stack. To avoid this, we
* avoid calling zfs_zget on the xattr znode and instead simply add
* it to the unlinked set and schedule a call to zfs_unlinked_drain.
*/
if (xattr_obj) {
/* Add extended attribute directory to the unlinked set. */
VERIFY3U(0, ==,
zap_add_int(os, zfsvfs->z_unlinkedobj, xattr_obj, tx));
}
#else
if (xzp) {
ASSERT(error == 0);
xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
@ -486,6 +510,7 @@ zfs_rmnode(znode_t *zp)
&xzp->z_links, sizeof (xzp->z_links), tx));
zfs_unlinked_add(xzp, tx);
}
#endif
/* Remove this znode from the unlinked set */
VERIFY3U(0, ==,
@ -494,9 +519,16 @@ zfs_rmnode(znode_t *zp)
zfs_znode_delete(zp, tx);
dmu_tx_commit(tx);
out:
if (xzp)
vput(ZTOV(xzp));
if (xattr_obj) {
/*
* We're using the FreeBSD taskqueue API here instead of
* the Solaris taskq API since the FreeBSD API allows for a
* task to be enqueued multiple times but executed once.
*/
taskqueue_enqueue(system_taskq->tq_queue,
&zfsvfs->z_unlinked_drain_task);
}
}
static uint64_t

View File

@ -972,6 +972,15 @@ zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
return (0);
}
#if defined(__FreeBSD__)
static void
zfsvfs_task_unlinked_drain(void *context, int pending __unused)
{
zfs_unlinked_drain((zfsvfs_t *)context);
}
#endif
int
zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
{
@ -1023,6 +1032,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
#if defined(__FreeBSD__)
TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
zfsvfs_task_unlinked_drain, zfsvfs);
#endif
#ifdef DIAGNOSTIC
rrm_init(&zfsvfs->z_teardown_lock, B_TRUE);
#else
@ -2015,6 +2028,11 @@ zfs_umount(vfs_t *vfsp, int fflag)
}
#endif
while (taskqueue_cancel(system_taskq->tq_queue,
&zfsvfs->z_unlinked_drain_task, NULL) != 0)
taskqueue_drain(system_taskq->tq_queue,
&zfsvfs->z_unlinked_drain_task);
VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
os = zfsvfs->z_os;