zfs_mount: taste geom providers for root pool config
This should allow to mount a dataset as a root filesystem even if it belongs to a pool that is not described in zpool.cache. This adds some overhead to the boot process though. If the root filesystem's pool is found in zpool.cache, the by default its cached configuration will be used for import. vfs.zfs.rootpool.prefer_cached_config could be set to zero to force the config to be retasted. Discussed with: gibbs, pjd, des MFC after: 25 days
This commit is contained in:
parent
a046473b9c
commit
61e100ee3b
@ -3550,8 +3550,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
return (0);
|
||||
}
|
||||
|
||||
#if defined(sun)
|
||||
#ifdef _KERNEL
|
||||
#if defined(sun)
|
||||
/*
|
||||
* Get the root pool information from the root disk, then import the root pool
|
||||
* during the system boot up time.
|
||||
@ -3753,8 +3753,115 @@ spa_import_rootpool(char *devpath, char *devid)
|
||||
return (error);
|
||||
}
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
extern int
|
||||
vdev_geom_read_pool_label(const char *name, nvlist_t **config);
|
||||
|
||||
static nvlist_t *
|
||||
spa_generate_rootconf(const char *name)
|
||||
{
|
||||
nvlist_t *config;
|
||||
nvlist_t *nvtop, *nvroot;
|
||||
uint64_t pgid;
|
||||
|
||||
if (vdev_geom_read_pool_label(name, &config) != 0)
|
||||
return (NULL);
|
||||
|
||||
/*
|
||||
* Add this top-level vdev to the child array.
|
||||
*/
|
||||
VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvtop) == 0);
|
||||
VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
||||
&pgid) == 0);
|
||||
|
||||
/*
|
||||
* Put this pool's top-level vdevs into a root vdev.
|
||||
*/
|
||||
VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
|
||||
VDEV_TYPE_ROOT) == 0);
|
||||
VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
|
||||
VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
|
||||
VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&nvtop, 1) == 0);
|
||||
|
||||
/*
|
||||
* Replace the existing vdev_tree with the new root vdev in
|
||||
* this pool's configuration (remove the old, add the new).
|
||||
*/
|
||||
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
|
||||
nvlist_free(nvroot);
|
||||
return (config);
|
||||
}
|
||||
|
||||
int
|
||||
spa_import_rootpool(const char *name)
|
||||
{
|
||||
spa_t *spa;
|
||||
vdev_t *rvd, *bvd, *avd = NULL;
|
||||
nvlist_t *config, *nvtop;
|
||||
uint64_t txg;
|
||||
char *pname;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Read the label from the boot device and generate a configuration.
|
||||
*/
|
||||
config = spa_generate_rootconf(name);
|
||||
if (config == NULL) {
|
||||
cmn_err(CE_NOTE, "Cannot find the pool label for '%s'",
|
||||
name);
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
|
||||
&pname) == 0 && strcmp(name, pname) == 0);
|
||||
VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
|
||||
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
if ((spa = spa_lookup(pname)) != NULL) {
|
||||
/*
|
||||
* Remove the existing root pool from the namespace so that we
|
||||
* can replace it with the correct config we just read in.
|
||||
*/
|
||||
spa_remove(spa);
|
||||
}
|
||||
spa = spa_add(pname, config, NULL);
|
||||
spa->spa_is_root = B_TRUE;
|
||||
spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
|
||||
|
||||
/*
|
||||
* Build up a vdev tree based on the boot device's label config.
|
||||
*/
|
||||
VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvtop) == 0);
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
|
||||
VDEV_ALLOC_ROOTPOOL);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
if (error) {
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
nvlist_free(config);
|
||||
cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
|
||||
pname);
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = 0;
|
||||
spa_history_log_version(spa, LOG_POOL_IMPORT);
|
||||
out:
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
vdev_free(rvd);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
#endif /* sun */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Import a non-root pool into the system.
|
||||
|
@ -419,7 +419,11 @@ extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
|
||||
size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
const char *history_str, nvlist_t *zplprops);
|
||||
#if defined(sun)
|
||||
extern int spa_import_rootpool(char *devpath, char *devid);
|
||||
#else
|
||||
extern int spa_import_rootpool(const char *name);
|
||||
#endif
|
||||
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
uint64_t flags);
|
||||
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
|
||||
|
@ -178,17 +178,11 @@ vdev_geom_detach(void *arg, int flag __unused)
|
||||
static uint64_t
|
||||
nvlist_get_guid(nvlist_t *list)
|
||||
{
|
||||
nvpair_t *elem = NULL;
|
||||
uint64_t value;
|
||||
|
||||
while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
|
||||
if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
|
||||
strcmp(nvpair_name(elem), "guid") == 0) {
|
||||
VERIFY(nvpair_value_uint64(elem, &value) == 0);
|
||||
return (value);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
value = 0;
|
||||
nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, &value);
|
||||
return (value);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -226,8 +220,16 @@ vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t siz
|
||||
return (error);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vdev_geom_read_guid(struct g_consumer *cp)
|
||||
static void
|
||||
vdev_geom_taste_orphan(struct g_consumer *cp)
|
||||
{
|
||||
|
||||
KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
|
||||
cp->provider->name));
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
|
||||
{
|
||||
struct g_provider *pp;
|
||||
vdev_label_t *label;
|
||||
@ -235,13 +237,13 @@ vdev_geom_read_guid(struct g_consumer *cp)
|
||||
size_t buflen;
|
||||
uint64_t psize;
|
||||
off_t offset, size;
|
||||
uint64_t guid;
|
||||
uint64_t guid, state, txg;
|
||||
int error, l, len;
|
||||
|
||||
g_topology_assert_not();
|
||||
|
||||
pp = cp->provider;
|
||||
ZFS_LOG(1, "Reading guid from %s...", pp->name);
|
||||
ZFS_LOG(1, "Reading config from %s...", pp->name);
|
||||
|
||||
psize = pp->mediasize;
|
||||
psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
|
||||
@ -253,8 +255,8 @@ vdev_geom_read_guid(struct g_consumer *cp)
|
||||
label = kmem_alloc(size, KM_SLEEP);
|
||||
buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
|
||||
|
||||
*config = NULL;
|
||||
for (l = 0; l < VDEV_LABELS; l++) {
|
||||
nvlist_t *config = NULL;
|
||||
|
||||
offset = vdev_label_offset(psize, l, 0);
|
||||
if ((offset % pp->sectorsize) != 0)
|
||||
@ -264,27 +266,149 @@ vdev_geom_read_guid(struct g_consumer *cp)
|
||||
continue;
|
||||
buf = label->vl_vdev_phys.vp_nvlist;
|
||||
|
||||
if (nvlist_unpack(buf, buflen, &config, 0) != 0)
|
||||
if (nvlist_unpack(buf, buflen, config, 0) != 0)
|
||||
continue;
|
||||
|
||||
guid = nvlist_get_guid(config);
|
||||
nvlist_free(config);
|
||||
if (guid != 0)
|
||||
break;
|
||||
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
|
||||
&state) != 0 || state >= POOL_STATE_DESTROYED) {
|
||||
nvlist_free(*config);
|
||||
*config = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
|
||||
&txg) != 0 || txg == 0) {
|
||||
nvlist_free(*config);
|
||||
*config = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
kmem_free(label, size);
|
||||
if (guid != 0)
|
||||
ZFS_LOG(1, "guid for %s is %ju", pp->name, (uintmax_t)guid);
|
||||
return (guid);
|
||||
return (*config == NULL ? ENOENT : 0);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_geom_check_config(nvlist_t *config, const char *name, uint64_t *best_txg)
|
||||
{
|
||||
uint64_t vdev_guid;
|
||||
uint64_t txg;
|
||||
char *pname;
|
||||
|
||||
if (nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
|
||||
strcmp(pname, name) != 0)
|
||||
return (ENOENT);
|
||||
|
||||
ZFS_LOG(1, "found pool: %s", pname);
|
||||
|
||||
txg = 0;
|
||||
nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg);
|
||||
if (txg <= *best_txg)
|
||||
return (ENOENT);
|
||||
*best_txg = txg;
|
||||
ZFS_LOG(1, "txg: %ju", (uintmax_t)*best_txg);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (pp->flags & G_PF_WITHER)
|
||||
return (EINVAL);
|
||||
if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
|
||||
return (EINVAL);
|
||||
g_attach(cp, pp);
|
||||
error = g_access(cp, 1, 0, 0);
|
||||
if (error != 0)
|
||||
g_detach(cp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_geom_taste_orphan(struct g_consumer *cp)
|
||||
vdev_geom_dettach_taster(struct g_consumer *cp)
|
||||
{
|
||||
g_access(cp, -1, 0, 0);
|
||||
g_detach(cp);
|
||||
}
|
||||
|
||||
KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
|
||||
cp->provider->name));
|
||||
int
|
||||
vdev_geom_read_pool_label(const char *name, nvlist_t **config)
|
||||
{
|
||||
struct g_class *mp;
|
||||
struct g_geom *gp, *zgp;
|
||||
struct g_provider *pp;
|
||||
struct g_consumer *zcp;
|
||||
nvlist_t *vdev_cfg;
|
||||
uint64_t best_txg;
|
||||
int error;
|
||||
|
||||
DROP_GIANT();
|
||||
g_topology_lock();
|
||||
|
||||
zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
|
||||
/* This orphan function should be never called. */
|
||||
zgp->orphan = vdev_geom_taste_orphan;
|
||||
zcp = g_new_consumer(zgp);
|
||||
|
||||
best_txg = 0;
|
||||
*config = NULL;
|
||||
LIST_FOREACH(mp, &g_classes, class) {
|
||||
if (mp == &zfs_vdev_class)
|
||||
continue;
|
||||
LIST_FOREACH(gp, &mp->geom, geom) {
|
||||
if (gp->flags & G_GEOM_WITHER)
|
||||
continue;
|
||||
LIST_FOREACH(pp, &gp->provider, provider) {
|
||||
if (pp->flags & G_PF_WITHER)
|
||||
continue;
|
||||
if (vdev_geom_attach_taster(zcp, pp) != 0)
|
||||
continue;
|
||||
g_topology_unlock();
|
||||
error = vdev_geom_read_config(zcp, &vdev_cfg);
|
||||
g_topology_lock();
|
||||
vdev_geom_dettach_taster(zcp);
|
||||
if (error)
|
||||
continue;
|
||||
ZFS_LOG(1, "successfully read vdev config");
|
||||
|
||||
error = vdev_geom_check_config(vdev_cfg, name,
|
||||
&best_txg);
|
||||
if (error != 0) {
|
||||
nvlist_free(vdev_cfg);
|
||||
continue;
|
||||
}
|
||||
nvlist_free(*config);
|
||||
*config = vdev_cfg;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g_destroy_consumer(zcp);
|
||||
g_destroy_geom(zgp);
|
||||
g_topology_unlock();
|
||||
PICKUP_GIANT();
|
||||
return (*config == NULL ? ENOENT : 0);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vdev_geom_read_guid(struct g_consumer *cp)
|
||||
{
|
||||
nvlist_t *config;
|
||||
uint64_t guid;
|
||||
|
||||
g_topology_assert_not();
|
||||
|
||||
guid = 0;
|
||||
if (vdev_geom_read_config(cp, &config) == 0) {
|
||||
guid = nvlist_get_guid(config);
|
||||
nvlist_free(config);
|
||||
}
|
||||
return (guid);
|
||||
}
|
||||
|
||||
static struct g_consumer *
|
||||
@ -311,18 +435,12 @@ vdev_geom_attach_by_guid(uint64_t guid)
|
||||
if (gp->flags & G_GEOM_WITHER)
|
||||
continue;
|
||||
LIST_FOREACH(pp, &gp->provider, provider) {
|
||||
if (pp->flags & G_PF_WITHER)
|
||||
if (vdev_geom_attach_taster(zcp, pp) != 0)
|
||||
continue;
|
||||
g_attach(zcp, pp);
|
||||
if (g_access(zcp, 1, 0, 0) != 0) {
|
||||
g_detach(zcp);
|
||||
continue;
|
||||
}
|
||||
g_topology_unlock();
|
||||
pguid = vdev_geom_read_guid(zcp);
|
||||
g_topology_lock();
|
||||
g_access(zcp, -1, 0, 0);
|
||||
g_detach(zcp);
|
||||
vdev_geom_dettach_taster(zcp);
|
||||
if (pguid != guid)
|
||||
continue;
|
||||
cp = vdev_geom_attach(pp);
|
||||
|
@ -1539,6 +1539,25 @@ zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
|
||||
}
|
||||
#endif /* OPENSOLARIS_MOUNTROOT */
|
||||
|
||||
static int
|
||||
getpoolname(const char *osname, char *poolname)
|
||||
{
|
||||
char *p;
|
||||
|
||||
p = strchr(osname, '/');
|
||||
if (p == NULL) {
|
||||
if (strlen(osname) >= MAXNAMELEN)
|
||||
return (ENAMETOOLONG);
|
||||
(void) strcpy(poolname, osname);
|
||||
} else {
|
||||
if (p - osname >= MAXNAMELEN)
|
||||
return (ENAMETOOLONG);
|
||||
(void) strncpy(poolname, osname, p - osname);
|
||||
poolname[p - osname] = '\0';
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
zfs_mount(vfs_t *vfsp)
|
||||
@ -1632,6 +1651,29 @@ zfs_mount(vfs_t *vfsp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Initial root mount: try hard to import the requested root pool. */
|
||||
if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
|
||||
(vfsp->vfs_flag & MNT_UPDATE) == 0) {
|
||||
char pname[MAXNAMELEN];
|
||||
spa_t *spa;
|
||||
int prefer_cache;
|
||||
|
||||
error = getpoolname(osname, pname);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
prefer_cache = 1;
|
||||
TUNABLE_INT_FETCH("vfs.zfs.rootpool.prefer_cached_config",
|
||||
&prefer_cache);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa = spa_lookup(pname);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
if (!prefer_cache || spa == NULL) {
|
||||
error = spa_import_rootpool(pname);
|
||||
if (error)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
DROP_GIANT();
|
||||
error = zfs_domount(vfsp, osname);
|
||||
PICKUP_GIANT();
|
||||
|
Loading…
Reference in New Issue
Block a user