loader: factor out label and uberblock load from vdev_probe, add MMP checks
Clean up the label read.
This commit is contained in:
parent
371d37d89f
commit
79a4bf8975
@ -1548,27 +1548,164 @@ vdev_label_offset(uint64_t psize, int l, uint64_t offset)
|
|||||||
return (offset + l * sizeof (vdev_label_t) + label_offset);
|
return (offset + l * sizeof (vdev_label_t) + label_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
|
||||||
|
{
|
||||||
|
unsigned int seq1 = 0;
|
||||||
|
unsigned int seq2 = 0;
|
||||||
|
int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
|
||||||
|
|
||||||
|
if (cmp != 0)
|
||||||
|
return (cmp);
|
||||||
|
|
||||||
|
cmp = AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp);
|
||||||
|
if (cmp != 0)
|
||||||
|
return (cmp);
|
||||||
|
|
||||||
|
if (MMP_VALID(ub1) && MMP_SEQ_VALID(ub1))
|
||||||
|
seq1 = MMP_SEQ(ub1);
|
||||||
|
|
||||||
|
if (MMP_VALID(ub2) && MMP_SEQ_VALID(ub2))
|
||||||
|
seq2 = MMP_SEQ(ub2);
|
||||||
|
|
||||||
|
return (AVL_CMP(seq1, seq2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
uberblock_verify(uberblock_t *ub)
|
||||||
|
{
|
||||||
|
if (ub->ub_magic == BSWAP_64((uint64_t)UBERBLOCK_MAGIC)) {
|
||||||
|
byteswap_uint64_array(ub, sizeof (uberblock_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ub->ub_magic != UBERBLOCK_MAGIC ||
|
||||||
|
!SPA_VERSION_IS_SUPPORTED(ub->ub_version))
|
||||||
|
return (EINVAL);
|
||||||
|
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
vdev_label_read(vdev_t *vd, int l, void *buf, uint64_t offset,
|
||||||
|
size_t size)
|
||||||
|
{
|
||||||
|
blkptr_t bp;
|
||||||
|
off_t off;
|
||||||
|
|
||||||
|
off = vdev_label_offset(vd->v_psize, l, offset);
|
||||||
|
|
||||||
|
BP_ZERO(&bp);
|
||||||
|
BP_SET_LSIZE(&bp, size);
|
||||||
|
BP_SET_PSIZE(&bp, size);
|
||||||
|
BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
|
||||||
|
BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
|
||||||
|
DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
|
||||||
|
ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
|
||||||
|
|
||||||
|
return (vdev_read_phys(vd, &bp, buf, off, size));
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned char *
|
||||||
|
vdev_label_read_config(vdev_t *vd, uint64_t txg)
|
||||||
|
{
|
||||||
|
vdev_phys_t *label;
|
||||||
|
uint64_t best_txg = 0;
|
||||||
|
uint64_t label_txg = 0;
|
||||||
|
uint64_t asize;
|
||||||
|
unsigned char *nvl;
|
||||||
|
size_t nvl_size;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
label = malloc(sizeof (vdev_phys_t));
|
||||||
|
if (label == NULL)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
nvl_size = VDEV_PHYS_SIZE - sizeof (zio_eck_t) - 4;
|
||||||
|
nvl = malloc(nvl_size);
|
||||||
|
if (nvl == NULL) {
|
||||||
|
free(label);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||||
|
const unsigned char *nvlist;
|
||||||
|
|
||||||
|
if (vdev_label_read(vd, l, label,
|
||||||
|
offsetof(vdev_label_t, vl_vdev_phys),
|
||||||
|
sizeof (vdev_phys_t)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (label->vp_nvlist[0] != NV_ENCODE_XDR)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
nvlist = (const unsigned char *) label->vp_nvlist + 4;
|
||||||
|
error = nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG,
|
||||||
|
DATA_TYPE_UINT64, NULL, &label_txg);
|
||||||
|
if (error != 0 || label_txg == 0)
|
||||||
|
return (nvl);
|
||||||
|
|
||||||
|
if (label_txg <= txg && label_txg > best_txg) {
|
||||||
|
best_txg = label_txg;
|
||||||
|
memcpy(nvl, nvlist, nvl_size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use asize from pool config. We need this
|
||||||
|
* because we can get bad value from BIOS.
|
||||||
|
*/
|
||||||
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
|
||||||
|
DATA_TYPE_UINT64, NULL, &asize) == 0) {
|
||||||
|
vd->v_psize = asize +
|
||||||
|
VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (best_txg == 0) {
|
||||||
|
free(nvl);
|
||||||
|
nvl = NULL;
|
||||||
|
}
|
||||||
|
return (nvl);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vdev_uberblock_load(vdev_t *vd, uberblock_t *ub)
|
||||||
|
{
|
||||||
|
uberblock_t *buf;
|
||||||
|
|
||||||
|
buf = malloc(VDEV_UBERBLOCK_SIZE(vd));
|
||||||
|
if (buf == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||||
|
for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
|
||||||
|
if (vdev_label_read(vd, l, buf,
|
||||||
|
VDEV_UBERBLOCK_OFFSET(vd, n),
|
||||||
|
VDEV_UBERBLOCK_SIZE(vd)))
|
||||||
|
continue;
|
||||||
|
if (uberblock_verify(buf) != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (vdev_uberblock_compare(buf, ub) > 0)
|
||||||
|
*ub = *buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(buf);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
||||||
{
|
{
|
||||||
vdev_t vtmp;
|
vdev_t vtmp;
|
||||||
vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch;
|
|
||||||
vdev_phys_t *tmp_label;
|
|
||||||
spa_t *spa;
|
spa_t *spa;
|
||||||
vdev_t *vdev, *top_vdev, *pool_vdev;
|
vdev_t *vdev, *top_vdev, *pool_vdev;
|
||||||
off_t off;
|
unsigned char *nvlist;
|
||||||
blkptr_t bp;
|
|
||||||
const unsigned char *nvlist = NULL;
|
|
||||||
uint64_t val;
|
uint64_t val;
|
||||||
uint64_t guid;
|
uint64_t guid;
|
||||||
uint64_t best_txg = 0;
|
|
||||||
uint64_t pool_txg, pool_guid;
|
uint64_t pool_txg, pool_guid;
|
||||||
const char *pool_name;
|
const char *pool_name;
|
||||||
const unsigned char *vdevs;
|
const unsigned char *vdevs;
|
||||||
const unsigned char *features;
|
const unsigned char *features;
|
||||||
int i, l, rc, is_newer;
|
int rc, is_newer;
|
||||||
char *upbuf;
|
|
||||||
const struct uberblock *up;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Load the vdev label and figure out which
|
* Load the vdev label and figure out which
|
||||||
@ -1580,71 +1717,24 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv),
|
vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv),
|
||||||
(uint64_t)sizeof (vdev_label_t));
|
(uint64_t)sizeof (vdev_label_t));
|
||||||
|
|
||||||
/* Test for minimum pool size. */
|
/* Test for minimum device size. */
|
||||||
if (vtmp.v_psize < SPA_MINDEVSIZE)
|
if (vtmp.v_psize < SPA_MINDEVSIZE)
|
||||||
return (EIO);
|
return (EIO);
|
||||||
|
|
||||||
tmp_label = zfs_alloc(sizeof(vdev_phys_t));
|
nvlist = vdev_label_read_config(&vtmp, UINT64_MAX);
|
||||||
|
if (nvlist == NULL)
|
||||||
for (l = 0; l < VDEV_LABELS; l++) {
|
|
||||||
off = vdev_label_offset(vtmp.v_psize, l,
|
|
||||||
offsetof(vdev_label_t, vl_vdev_phys));
|
|
||||||
|
|
||||||
BP_ZERO(&bp);
|
|
||||||
BP_SET_LSIZE(&bp, sizeof(vdev_phys_t));
|
|
||||||
BP_SET_PSIZE(&bp, sizeof(vdev_phys_t));
|
|
||||||
BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
|
|
||||||
BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
|
|
||||||
DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
|
|
||||||
ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
|
|
||||||
|
|
||||||
if (vdev_read_phys(&vtmp, &bp, tmp_label, off, 0))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (tmp_label->vp_nvlist[0] != NV_ENCODE_XDR)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nvlist = (const unsigned char *) tmp_label->vp_nvlist + 4;
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG,
|
|
||||||
DATA_TYPE_UINT64, NULL, &pool_txg) != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (best_txg <= pool_txg) {
|
|
||||||
uint64_t asize;
|
|
||||||
|
|
||||||
best_txg = pool_txg;
|
|
||||||
memcpy(vdev_label, tmp_label, sizeof (vdev_phys_t));
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use asize from pool config. We need this
|
|
||||||
* because we can get bad value from BIOS.
|
|
||||||
*/
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
|
|
||||||
DATA_TYPE_UINT64, NULL, &asize) == 0) {
|
|
||||||
vtmp.v_psize = asize +
|
|
||||||
VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
zfs_free(tmp_label, sizeof (vdev_phys_t));
|
|
||||||
|
|
||||||
if (best_txg == 0)
|
|
||||||
return (EIO);
|
return (EIO);
|
||||||
|
|
||||||
if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR)
|
|
||||||
return (EIO);
|
|
||||||
|
|
||||||
nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4;
|
|
||||||
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64,
|
||||||
NULL, &val) != 0) {
|
NULL, &val) != 0) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!SPA_VERSION_IS_SUPPORTED(val)) {
|
if (!SPA_VERSION_IS_SUPPORTED(val)) {
|
||||||
printf("ZFS: unsupported ZFS version %u (should be %u)\n",
|
printf("ZFS: unsupported ZFS version %u (should be %u)\n",
|
||||||
(unsigned) val, (unsigned) SPA_VERSION);
|
(unsigned) val, (unsigned) SPA_VERSION);
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1652,16 +1742,19 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
|
||||||
DATA_TYPE_NVLIST, NULL, &features) == 0 &&
|
DATA_TYPE_NVLIST, NULL, &features) == 0 &&
|
||||||
nvlist_check_features_for_read(features) != 0) {
|
nvlist_check_features_for_read(features) != 0) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64,
|
||||||
NULL, &val) != 0) {
|
NULL, &val) != 0) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (val == POOL_STATE_DESTROYED) {
|
if (val == POOL_STATE_DESTROYED) {
|
||||||
/* We don't boot only from destroyed pools. */
|
/* We don't boot only from destroyed pools. */
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1675,12 +1768,13 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
* Cache and spare devices end up here - just ignore
|
* Cache and spare devices end up here - just ignore
|
||||||
* them.
|
* them.
|
||||||
*/
|
*/
|
||||||
/*printf("ZFS: can't find pool details\n");*/
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64,
|
||||||
NULL, &val) == 0 && val != 0) {
|
NULL, &val) == 0 && val != 0) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1690,8 +1784,10 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
spa = spa_find_by_guid(pool_guid);
|
spa = spa_find_by_guid(pool_guid);
|
||||||
if (spa == NULL) {
|
if (spa == NULL) {
|
||||||
spa = spa_create(pool_guid, pool_name);
|
spa = spa_create(pool_guid, pool_name);
|
||||||
if (spa == NULL)
|
if (spa == NULL) {
|
||||||
|
free(nvlist);
|
||||||
return (ENOMEM);
|
return (ENOMEM);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (pool_txg > spa->spa_txg) {
|
if (pool_txg > spa->spa_txg) {
|
||||||
spa->spa_txg = pool_txg;
|
spa->spa_txg = pool_txg;
|
||||||
@ -1708,18 +1804,24 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
*/
|
*/
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
|
||||||
NULL, &guid) != 0) {
|
NULL, &guid) != 0) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
vdev = vdev_find(guid);
|
vdev = vdev_find(guid);
|
||||||
if (vdev && vdev->v_phys_read) /* Has this vdev already been inited? */
|
/* Has this vdev already been inited? */
|
||||||
|
if (vdev && vdev->v_phys_read) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
|
}
|
||||||
|
|
||||||
if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
|
if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST,
|
||||||
NULL, &vdevs)) {
|
NULL, &vdevs)) {
|
||||||
|
free(nvlist);
|
||||||
return (EIO);
|
return (EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer);
|
rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer);
|
||||||
|
free(nvlist);
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
return (rc);
|
return (rc);
|
||||||
|
|
||||||
@ -1729,6 +1831,7 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink)
|
STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink)
|
||||||
if (top_vdev == pool_vdev)
|
if (top_vdev == pool_vdev)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!pool_vdev && top_vdev) {
|
if (!pool_vdev && top_vdev) {
|
||||||
top_vdev->spa = spa;
|
top_vdev->spa = spa;
|
||||||
STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink);
|
STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink);
|
||||||
@ -1765,36 +1868,7 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
|
|||||||
* the best uberblock and then we can actually access
|
* the best uberblock and then we can actually access
|
||||||
* the contents of the pool.
|
* the contents of the pool.
|
||||||
*/
|
*/
|
||||||
upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev));
|
vdev_uberblock_load(vdev, &spa->spa_uberblock);
|
||||||
up = (const struct uberblock *)upbuf;
|
|
||||||
for (l = 0; l < VDEV_LABELS; l++) {
|
|
||||||
for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdev); i++) {
|
|
||||||
off = vdev_label_offset(vdev->v_psize, l,
|
|
||||||
VDEV_UBERBLOCK_OFFSET(vdev, i));
|
|
||||||
BP_ZERO(&bp);
|
|
||||||
DVA_SET_OFFSET(&bp.blk_dva[0], off);
|
|
||||||
BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
|
|
||||||
BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
|
|
||||||
BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
|
|
||||||
BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
|
|
||||||
ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
|
|
||||||
|
|
||||||
if (vdev_read_phys(vdev, &bp, upbuf, off, 0))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (up->ub_magic != UBERBLOCK_MAGIC)
|
|
||||||
continue;
|
|
||||||
if (up->ub_txg < spa->spa_txg)
|
|
||||||
continue;
|
|
||||||
if (up->ub_txg > spa->spa_uberblock.ub_txg ||
|
|
||||||
(up->ub_txg == spa->spa_uberblock.ub_txg &&
|
|
||||||
up->ub_timestamp >
|
|
||||||
spa->spa_uberblock.ub_timestamp)) {
|
|
||||||
spa->spa_uberblock = *up;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev));
|
|
||||||
|
|
||||||
vdev->spa = spa;
|
vdev->spa = spa;
|
||||||
if (spap != NULL)
|
if (spap != NULL)
|
||||||
|
@ -63,6 +63,14 @@
|
|||||||
|
|
||||||
#define _NOTE(s)
|
#define _NOTE(s)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AVL comparator helpers
|
||||||
|
*/
|
||||||
|
#define AVL_ISIGN(a) (((a) > 0) - ((a) < 0))
|
||||||
|
#define AVL_CMP(a, b) (((a) > (b)) - ((a) < (b)))
|
||||||
|
#define AVL_PCMP(a, b) \
|
||||||
|
(((uintptr_t)(a) > (uintptr_t)(b)) - ((uintptr_t)(a) < (uintptr_t)(b)))
|
||||||
|
|
||||||
typedef enum { B_FALSE, B_TRUE } boolean_t;
|
typedef enum { B_FALSE, B_TRUE } boolean_t;
|
||||||
|
|
||||||
/* CRC64 table */
|
/* CRC64 table */
|
||||||
@ -490,8 +498,16 @@ typedef struct zio_gbh {
|
|||||||
#define VDEV_PHYS_SIZE (112 << 10)
|
#define VDEV_PHYS_SIZE (112 << 10)
|
||||||
#define VDEV_UBERBLOCK_RING (128 << 10)
|
#define VDEV_UBERBLOCK_RING (128 << 10)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock
|
||||||
|
* ring when MMP is enabled.
|
||||||
|
*/
|
||||||
|
#define MMP_BLOCKS_PER_LABEL 1
|
||||||
|
|
||||||
|
/* The largest uberblock we support is 8k. */
|
||||||
|
#define MAX_UBERBLOCK_SHIFT (13)
|
||||||
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
||||||
MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT)
|
MIN(MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT), MAX_UBERBLOCK_SHIFT)
|
||||||
#define VDEV_UBERBLOCK_COUNT(vd) \
|
#define VDEV_UBERBLOCK_COUNT(vd) \
|
||||||
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
|
(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
|
||||||
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
|
#define VDEV_UBERBLOCK_OFFSET(vd, n) \
|
||||||
@ -841,14 +857,88 @@ typedef enum pool_state {
|
|||||||
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
||||||
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
||||||
|
|
||||||
struct uberblock {
|
#define MMP_MAGIC 0xa11cea11 /* all-see-all */
|
||||||
|
|
||||||
|
#define MMP_INTERVAL_VALID_BIT 0x01
|
||||||
|
#define MMP_SEQ_VALID_BIT 0x02
|
||||||
|
#define MMP_FAIL_INT_VALID_BIT 0x04
|
||||||
|
|
||||||
|
#define MMP_VALID(ubp) (ubp->ub_magic == UBERBLOCK_MAGIC && \
|
||||||
|
ubp->ub_mmp_magic == MMP_MAGIC)
|
||||||
|
#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||||
|
MMP_INTERVAL_VALID_BIT))
|
||||||
|
#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||||
|
MMP_SEQ_VALID_BIT))
|
||||||
|
#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||||
|
MMP_FAIL_INT_VALID_BIT))
|
||||||
|
|
||||||
|
#define MMP_INTERVAL(ubp) ((ubp->ub_mmp_config & 0x00000000FFFFFF00) \
|
||||||
|
>> 8)
|
||||||
|
#define MMP_SEQ(ubp) ((ubp->ub_mmp_config & 0x0000FFFF00000000) \
|
||||||
|
>> 32)
|
||||||
|
#define MMP_FAIL_INT(ubp) ((ubp->ub_mmp_config & 0xFFFF000000000000) \
|
||||||
|
>> 48)
|
||||||
|
|
||||||
|
typedef struct uberblock {
|
||||||
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
||||||
uint64_t ub_version; /* SPA_VERSION */
|
uint64_t ub_version; /* SPA_VERSION */
|
||||||
uint64_t ub_txg; /* txg of last sync */
|
uint64_t ub_txg; /* txg of last sync */
|
||||||
uint64_t ub_guid_sum; /* sum of all vdev guids */
|
uint64_t ub_guid_sum; /* sum of all vdev guids */
|
||||||
uint64_t ub_timestamp; /* UTC time of last sync */
|
uint64_t ub_timestamp; /* UTC time of last sync */
|
||||||
blkptr_t ub_rootbp; /* MOS objset_phys_t */
|
blkptr_t ub_rootbp; /* MOS objset_phys_t */
|
||||||
};
|
/* highest SPA_VERSION supported by software that wrote this txg */
|
||||||
|
uint64_t ub_software_version;
|
||||||
|
/* Maybe missing in uberblocks we read, but always written */
|
||||||
|
uint64_t ub_mmp_magic;
|
||||||
|
/*
|
||||||
|
* If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off.
|
||||||
|
* Otherwise, nanosec since last MMP write.
|
||||||
|
*/
|
||||||
|
uint64_t ub_mmp_delay;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The ub_mmp_config contains the multihost write interval, multihost
|
||||||
|
* fail intervals, sequence number for sub-second granularity, and
|
||||||
|
* valid bit mask. This layout is as follows:
|
||||||
|
*
|
||||||
|
* 64 56 48 40 32 24 16 8 0
|
||||||
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||||
|
* 0 | Fail Intervals| Seq | Write Interval (ms) | VALID |
|
||||||
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||||
|
*
|
||||||
|
* This allows a write_interval of (2^24/1000)s, over 4.5 hours
|
||||||
|
*
|
||||||
|
* VALID Bits:
|
||||||
|
* - 0x01 - Write Interval (ms)
|
||||||
|
* - 0x02 - Sequence number exists
|
||||||
|
* - 0x04 - Fail Intervals
|
||||||
|
* - 0xf8 - Reserved
|
||||||
|
*/
|
||||||
|
uint64_t ub_mmp_config;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ub_checkpoint_txg indicates two things about the current uberblock:
|
||||||
|
*
|
||||||
|
* 1] If it is not zero then this uberblock is a checkpoint. If it is
|
||||||
|
* zero, then this uberblock is not a checkpoint.
|
||||||
|
*
|
||||||
|
* 2] On checkpointed uberblocks, the value of ub_checkpoint_txg is
|
||||||
|
* the ub_txg that the uberblock had at the time we moved it to
|
||||||
|
* the MOS config.
|
||||||
|
*
|
||||||
|
* The field is set when we checkpoint the uberblock and continues to
|
||||||
|
* hold that value even after we've rewound (unlike the ub_txg that
|
||||||
|
* is reset to a higher value).
|
||||||
|
*
|
||||||
|
* Besides checks used to determine whether we are reopening the
|
||||||
|
* pool from a checkpointed uberblock [see spa_ld_select_uberblock()],
|
||||||
|
* the value of the field is used to determine which ZIL blocks have
|
||||||
|
* been allocated according to the ms_sm when we are rewinding to a
|
||||||
|
* checkpoint. Specifically, if blk_birth > ub_checkpoint_txg, then
|
||||||
|
* the ZIL block is not allocated [see uses of spa_min_claim_txg()].
|
||||||
|
*/
|
||||||
|
uint64_t ub_checkpoint_txg;
|
||||||
|
} uberblock_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags.
|
* Flags.
|
||||||
|
Loading…
Reference in New Issue
Block a user