MFV r251626:

ZFS event processing should work on R/O root filesystems

Illumos ZFS issues:
  3749 zfs event processing should work on R/O root filesystems

MFC after:      2 weeks
This commit is contained in:
Xin LI 2013-06-11 19:35:44 +00:00
commit ed8fd1989f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=251636
4 changed files with 71 additions and 12 deletions

View File

@ -88,6 +88,12 @@ TUNABLE_INT("vfs.zfs.check_hostid", &check_hostid);
SYSCTL_INT(_vfs_zfs, OID_AUTO, check_hostid, CTLFLAG_RW, &check_hostid, 0,
"Check hostid on import?");
/*
* The interval, in seconds, at which failed configuration cache file writes
* should be retried.
*/
static int zfs_ccw_retry_interval = 300;
typedef enum zti_modes {
zti_mode_fixed, /* value is # of threads (min 1) */
zti_mode_online_percent, /* value is % of online CPUs */
@ -5852,13 +5858,34 @@ spa_async_resume(spa_t *spa)
mutex_exit(&spa->spa_async_lock);
}
static boolean_t
spa_async_tasks_pending(spa_t *spa)
{
uint_t non_config_tasks;
uint_t config_task;
boolean_t config_task_suspended;
non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
if (spa->spa_ccw_fail_time == 0) {
config_task_suspended = B_FALSE;
} else {
config_task_suspended =
(gethrtime() - spa->spa_ccw_fail_time) <
(zfs_ccw_retry_interval * NANOSEC);
}
return (non_config_tasks || (config_task && !config_task_suspended));
}
static void
spa_async_dispatch(spa_t *spa)
{
mutex_enter(&spa->spa_async_lock);
if (spa->spa_async_tasks && !spa->spa_async_suspended &&
if (spa_async_tasks_pending(spa) &&
!spa->spa_async_suspended &&
spa->spa_async_thread == NULL &&
rootdir != NULL && !vn_is_readonly(rootdir))
rootdir != NULL)
spa->spa_async_thread = thread_create(NULL, 0,
spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
mutex_exit(&spa->spa_async_lock);

View File

@ -27,6 +27,7 @@
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/fm/fs/zfs.h>
#include <sys/spa_impl.h>
#include <sys/nvpair.h>
#include <sys/uio.h>
@ -139,7 +140,7 @@ spa_config_load(void)
kobj_close_file(file);
}
static void
static int
spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
{
size_t buflen;
@ -147,13 +148,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
vnode_t *vp;
int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
char *temp;
int err;
/*
* If the nvlist is empty (NULL), then remove the old cachefile.
*/
if (nvl == NULL) {
(void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
return;
err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
return (err);
}
/*
@ -174,12 +176,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
*/
(void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) {
if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
0, RLIM64_INFINITY, kcred, NULL) == 0 &&
VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
(void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
}
err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
if (err == 0) {
err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
0, RLIM64_INFINITY, kcred, NULL);
if (err == 0)
err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
if (err == 0)
err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
}
@ -187,6 +191,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
kmem_free(buf, buflen);
kmem_free(temp, MAXPATHLEN);
return (err);
}
/*
@ -198,6 +203,8 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
{
spa_config_dirent_t *dp, *tdp;
nvlist_t *nvl;
boolean_t ccw_failure;
int error;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@ -209,6 +216,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
* cachefile is changed, the new one is pushed onto this list, allowing
* us to update previous cachefiles that no longer contain this pool.
*/
ccw_failure = B_FALSE;
for (dp = list_head(&target->spa_config_list); dp != NULL;
dp = list_next(&target->spa_config_list, dp)) {
spa_t *spa = NULL;
@ -249,10 +257,32 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
mutex_exit(&spa->spa_props_lock);
}
spa_config_write(dp, nvl);
error = spa_config_write(dp, nvl);
if (error != 0)
ccw_failure = B_TRUE;
nvlist_free(nvl);
}
if (ccw_failure) {
/*
* Keep trying so that configuration data is
* written if/when any temporary filesystem
* resource issues are resolved.
*/
if (target->spa_ccw_fail_time == 0) {
zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
target, NULL, NULL, 0, 0);
}
target->spa_ccw_fail_time = gethrtime();
spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
} else {
/*
* Do not rate limit future attempts to update
* the config cache.
*/
target->spa_ccw_fail_time = 0;
}
/*
* Remove any config entries older than the current one.
*/

View File

@ -241,6 +241,7 @@ struct spa {
uint64_t spa_deadman_calls; /* number of deadman calls */
uint64_t spa_sync_starttime; /* starting time fo spa_sync */
uint64_t spa_deadman_synctime; /* deadman expiration timer */
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
/*
* spa_refcount & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.

View File

@ -46,6 +46,7 @@ extern "C" {
#define FM_EREPORT_ZFS_IO_FAILURE "io_failure"
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"