diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c index 06c2e253960d..b7b08251e45b 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c @@ -212,7 +212,8 @@ get_usage(zpool_help_t idx) case HELP_CLEAR: return (gettext("\tclear [-nF] [device]\n")); case HELP_CREATE: - return (gettext("\tcreate [-fnd] [-o property=value] ... \n" + return (gettext("\tcreate [-fnd] [-B] " + "[-o property=value] ... \n" "\t [-O file-system-property=value] ... \n" "\t [-m mountpoint] [-R root] ...\n")); case HELP_DESTROY: @@ -499,6 +500,8 @@ zpool_do_add(int argc, char **argv) int c; nvlist_t *nvroot; char *poolname; + zpool_boot_label_t boot_type; + uint64_t boot_size; int ret; zpool_handle_t *zhp; nvlist_t *config; @@ -547,9 +550,15 @@ zpool_do_add(int argc, char **argv) return (1); } + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + /* pass off to get_vdev_spec for processing */ + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun, - argc, argv); + boot_type, boot_size, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); @@ -774,10 +783,11 @@ zpool_do_labelclear(int argc, char **argv) } /* - * zpool create [-fnd] [-o property=value] ... + * zpool create [-fnd] [-B] [-o property=value] ... * [-O file-system-property=value] ... * [-R root] [-m mountpoint] ... * + * -B Create boot partition. * -f Force creation, even if devices appear in use * -n Do not create the pool, but display the resulting layout if it * were to be created. @@ -794,12 +804,16 @@ zpool_do_labelclear(int argc, char **argv) * we get the nvlist back from get_vdev_spec(), we either print out the contents * (if '-n' was specified), or pass it to libzfs to do the creation. */ + +#define SYSTEM256 (256 * 1024 * 1024) int zpool_do_create(int argc, char **argv) { boolean_t force = B_FALSE; boolean_t dryrun = B_FALSE; boolean_t enable_all_pool_feat = B_TRUE; + zpool_boot_label_t boot_type = ZPOOL_NO_BOOT_LABEL; + uint64_t boot_size = 0; int c; nvlist_t *nvroot = NULL; char *poolname; @@ -811,7 +825,7 @@ zpool_do_create(int argc, char **argv) char *propval; /* check options */ - while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) { + while ((c = getopt(argc, argv, ":fndBR:m:o:O:")) != -1) { switch (c) { case 'f': force = B_TRUE; @@ -822,6 +836,22 @@ zpool_do_create(int argc, char **argv) case 'd': enable_all_pool_feat = B_FALSE; break; + case 'B': +#ifdef illumos + /* + * We should create the system partition. + * Also make sure the size is set. + */ + boot_type = ZPOOL_CREATE_BOOT_LABEL; + if (boot_size == 0) + boot_size = SYSTEM256; + break; +#else + (void) fprintf(stderr, + gettext("option '%c' is not supported\n"), + optopt); + goto badusage; +#endif case 'R': altroot = optarg; if (add_prop_list(zpool_prop_to_name( @@ -851,6 +881,20 @@ zpool_do_create(int argc, char **argv) if (add_prop_list(optarg, propval, &props, B_TRUE)) goto errout; + /* + * Get bootsize value for make_root_vdev(). + */ + if (zpool_name_to_prop(optarg) == ZPOOL_PROP_BOOTSIZE) { + if (zfs_nicestrtonum(g_zfs, propval, + &boot_size) < 0 || boot_size == 0) { + (void) fprintf(stderr, + gettext("bad boot partition size " + "'%s': %s\n"), propval, + libzfs_error_description(g_zfs)); + goto errout; + } + } + /* * If the user is creating a pool that doesn't support * feature flags, don't enable any features. @@ -928,9 +972,43 @@ zpool_do_create(int argc, char **argv) goto errout; } + /* + * Make sure the bootsize is set when ZPOOL_CREATE_BOOT_LABEL is used, + * and not set otherwise. + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + const char *propname; + char *strptr, *buf = NULL; + int rv; + + propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); + if (nvlist_lookup_string(props, propname, &strptr) != 0) { + (void) asprintf(&buf, "%" PRIu64, boot_size); + if (buf == NULL) { + (void) fprintf(stderr, + gettext("internal error: out of memory\n")); + goto errout; + } + rv = add_prop_list(propname, buf, &props, B_TRUE); + free(buf); + if (rv != 0) + goto errout; + } + } else { + const char *propname; + char *strptr; + + propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE); + if (nvlist_lookup_string(props, propname, &strptr) == 0) { + (void) fprintf(stderr, gettext("error: setting boot " + "partition size requires option '-B'\n")); + goto errout; + } + } + /* pass off to get_vdev_spec for bulk processing */ nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun, - argc - 1, argv + 1); + boot_type, boot_size, argc - 1, argv + 1); if (nvroot == NULL) goto errout; @@ -3209,6 +3287,8 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) nvlist_t *nvroot; char *poolname, *old_disk, *new_disk; zpool_handle_t *zhp; + zpool_boot_label_t boot_type; + uint64_t boot_size; int ret; /* check options */ @@ -3273,8 +3353,14 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing) return (1); } + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL); nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE, - argc, argv); + boot_type, boot_size, argc, argv); if (nvroot == NULL) { zpool_close(zhp); return (1); diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h index 134c730fcf8e..8777edc9de17 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h @@ -44,7 +44,8 @@ uint_t num_logs(nvlist_t *nv); */ nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, int argc, char **argv); + boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, + uint64_t boot_size, int argc, char **argv); nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, splitflags_t flags, int argc, char **argv); diff --git a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c index d860afcb6cc2..f72e3f7fc97c 100644 --- a/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c +++ b/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c @@ -935,14 +935,15 @@ check_replication(nvlist_t *config, nvlist_t *newroot) * Go through and find any whole disks in the vdev specification, labelling them * as appropriate. When constructing the vdev spec, we were unable to open this * device in order to provide a devid. Now that we have labelled the disk and - * know that slice 0 is valid, we can construct the devid now. + * know the pool slice is valid, we can construct the devid now. * * If the disk was already labeled with an EFI label, we will have gotten the * devid already (because we were able to open the whole disk). Otherwise, we * need to get the devid after we label the disk. */ static int -make_disks(zpool_handle_t *zhp, nvlist_t *nv) +make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type, + uint64_t boot_size) { nvlist_t **child; uint_t c, children; @@ -951,6 +952,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) uint64_t wholedisk; int fd; int ret; + int slice; ddi_devid_t devid; char *minor = NULL, *devid_str = NULL; @@ -968,20 +970,36 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) * slice and stat()ing the device. */ verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, - &wholedisk) != 0 || !wholedisk) - return (0); diskname = strrchr(path, '/'); assert(diskname != NULL); diskname++; - if (zpool_label_disk(g_zfs, zhp, diskname) == -1) - return (-1); + + if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, + &wholedisk) != 0 || !wholedisk) { + /* + * This is not whole disk, return error if + * boot partition creation was requested + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + (void) fprintf(stderr, + gettext("creating boot partition is only " + "supported on whole disk vdevs: %s\n"), + diskname); + return (-1); + } + return (0); + } + + ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type, + boot_size, &slice); + if (ret == -1) + return (ret); /* * Fill in the devid, now that we've labeled the disk. */ - (void) snprintf(buf, sizeof (buf), "%ss0", path); + (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice); if ((fd = open(buf, O_RDONLY)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), @@ -1004,7 +1022,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) } /* - * Update the path to refer to the 's0' slice. The presence of + * Update the path to refer to the pool slice. The presence of * the 'whole_disk' field indicates to the CLI that we should * chop off the slice number when displaying the device in * future output. @@ -1016,21 +1034,36 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) return (0); } - for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + /* illumos kernel does not support booting from multi-vdev pools. */ + if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) { + if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) { + (void) fprintf(stderr, gettext("boot pool " + "can not have more than one vdev\n")); + return (-1); + } + } + + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) return (ret); + } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES, &child, &children) == 0) - for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) return (ret); + } if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) - for (c = 0; c < children; c++) - if ((ret = make_disks(zhp, child[c])) != 0) + for (c = 0; c < children; c++) { + ret = make_disks(zhp, child[c], boot_type, boot_size); + if (ret != 0) return (ret); + } return (0); } @@ -1429,6 +1462,9 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, { nvlist_t *newroot = NULL, **child; uint_t c, children; +#ifdef illumos + zpool_boot_label_t boot_type; +#endif if (argc > 0) { if ((newroot = construct_spec(argc, argv)) == NULL) { @@ -1438,7 +1474,13 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, } #ifdef illumos - if (!flags.dryrun && make_disks(zhp, newroot) != 0) { + if (zpool_is_bootable(zhp)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + + if (!flags.dryrun && + make_disks(zhp, newroot, boot_type, 0) != 0) { nvlist_free(newroot); return (NULL); } @@ -1483,7 +1525,8 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props, */ nvlist_t * make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, - boolean_t replacing, boolean_t dryrun, int argc, char **argv) + boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type, + uint64_t boot_size, int argc, char **argv) { nvlist_t *newroot; nvlist_t *poolconfig = NULL; @@ -1525,7 +1568,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep, /* * Run through the vdev specification and label any whole disks found. */ - if (!dryrun && make_disks(zhp, newroot) != 0) { + if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) { nvlist_free(newroot); return (NULL); } diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h index 9133238cb848..3e15dd1c814d 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h @@ -134,6 +134,18 @@ typedef enum zfs_error { EZFS_UNKNOWN } zfs_error_t; +/* + * UEFI boot support parameters. When creating whole disk boot pool, + * zpool create should allow to create EFI System partition for UEFI boot + * program. In case of BIOS, the EFI System partition is not used + * even if it does exist. + */ +typedef enum zpool_boot_label { + ZPOOL_NO_BOOT_LABEL = 0, + ZPOOL_CREATE_BOOT_LABEL, + ZPOOL_COPY_BOOT_LABEL +} zpool_boot_label_t; + /* * The following data structures are all part * of the zfs_allow_t data structure which is @@ -266,7 +278,8 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, boolean_t *, boolean_t *, boolean_t *); -extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *); +extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *, + zpool_boot_label_t, uint64_t, int *); /* * Functions to manage pool properties @@ -349,6 +362,7 @@ extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **); extern nvlist_t *zpool_get_features(zpool_handle_t *); extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *); extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **); +extern boolean_t zpool_is_bootable(zpool_handle_t *); /* * Import and export functions diff --git a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c index 962b3ff2b61b..612f37ebd4b8 100644 --- a/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c +++ b/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c @@ -49,7 +49,7 @@ #include "zfs_comutil.h" #include "zfeature_common.h" -static int read_efi_label(nvlist_t *config, diskaddr_t *sb); +static int read_efi_label(nvlist_t *, diskaddr_t *, boolean_t *); static boolean_t zpool_vdev_is_interior(const char *name); #define BACKUP_SLICE "s2" @@ -316,6 +316,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len, (void) zfs_nicenum(intval, buf, len); } break; + case ZPOOL_PROP_BOOTSIZE: case ZPOOL_PROP_EXPANDSZ: if (intval == 0) { (void) strlcpy(buf, "-", len); @@ -517,6 +518,16 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, } break; + case ZPOOL_PROP_BOOTSIZE: + if (!flags.create) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "property '%s' can only be set during pool " + "creation"), propname); + (void) zfs_error(hdl, EZFS_BADPROP, errbuf); + goto error; + } + break; + case ZPOOL_PROP_BOOTFS: if (flags.create || flags.import) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, @@ -1990,8 +2001,9 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* * Search for the requested value. Special cases: * - * - ZPOOL_CONFIG_PATH for whole disk entries. These end in - * "s0" or "s0/old". The "s0" part is hidden from the user, + * - ZPOOL_CONFIG_PATH for whole disk entries. To support + * UEFI boot, these end in "s0" or "s0/old" or "s1" or + * "s1/old". The "s0" or "s1" part is hidden from the user, * but included in the string, so this matches around it. * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE). * @@ -2022,14 +2034,16 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare, /* * strings identical except trailing "s0" */ - if (strcmp(&val[vlen - 2], "s0") == 0 && + if ((strcmp(&val[vlen - 2], "s0") == 0 || + strcmp(&val[vlen - 2], "s1") == 0) && strncmp(srchval, val, slen) == 0) return (nv); /* * strings identical except trailing "s0/old" */ - if (strcmp(&val[vlen - 6], "s0/old") == 0 && + if ((strcmp(&val[vlen - 6], "s0/old") == 0 || + strcmp(&val[vlen - 6], "s1/old") == 0) && strcmp(&srchval[slen - 4], "/old") == 0 && strncmp(srchval, val, slen - 4) == 0) return (nv); @@ -3460,15 +3474,17 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv, char *tmp = zfs_strdup(hdl, path); /* - * If it starts with c#, and ends with "s0", chop - * the "s0" off, or if it ends with "s0/old", remove - * the "s0" from the middle. + * If it starts with c#, and ends with "s0" or "s1", + * chop the slice off, or if it ends with "s0/old" or + * "s1/old", remove the slice from the middle. */ if (CTD_CHECK(tmp)) { - if (strcmp(&tmp[pathlen - 2], "s0") == 0) { + if (strcmp(&tmp[pathlen - 2], "s0") == 0 || + strcmp(&tmp[pathlen - 2], "s1") == 0) { tmp[pathlen - 2] = '\0'; } else if (pathlen > 6 && - strcmp(&tmp[pathlen - 6], "s0/old") == 0) { + (strcmp(&tmp[pathlen - 6], "s0/old") == 0 || + strcmp(&tmp[pathlen - 6], "s1/old") == 0)) { (void) strcpy(&tmp[pathlen - 6], "/old"); } @@ -3873,15 +3889,18 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, * Read the EFI label from the config, if a label does not exist then * pass back the error to the caller. If the caller has passed a non-NULL * diskaddr argument then we set it to the starting address of the EFI - * partition. + * partition. If the caller has passed a non-NULL boolean argument, then + * we set it to indicate if the disk does have efi system partition. */ static int -read_efi_label(nvlist_t *config, diskaddr_t *sb) +read_efi_label(nvlist_t *config, diskaddr_t *sb, boolean_t *system) { char *path; int fd; char diskname[MAXPATHLEN]; + boolean_t boot = B_FALSE; int err = -1; + int slice; if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0) return (err); @@ -3892,8 +3911,16 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb) struct dk_gpt *vtoc; if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) { - if (sb != NULL) - *sb = vtoc->efi_parts[0].p_start; + for (slice = 0; slice < vtoc->efi_nparts; slice++) { + if (vtoc->efi_parts[slice].p_tag == V_SYSTEM) + boot = B_TRUE; + if (vtoc->efi_parts[slice].p_tag == V_USR) + break; + } + if (sb != NULL && vtoc->efi_parts[slice].p_tag == V_USR) + *sb = vtoc->efi_parts[slice].p_start; + if (system != NULL) + *system = boot; efi_free(vtoc); } (void) close(fd); @@ -3920,7 +3947,7 @@ find_start_block(nvlist_t *config) &wholedisk) != 0 || !wholedisk) { return (MAXOFFSET_T); } - if (read_efi_label(config, &sb) < 0) + if (read_efi_label(config, &sb, NULL) < 0) sb = MAXOFFSET_T; return (sb); } @@ -3940,7 +3967,8 @@ find_start_block(nvlist_t *config) * stripped of any leading /dev path. */ int -zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) +zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name, + zpool_boot_label_t boot_type, uint64_t boot_size, int *slice) { #ifdef illumos char path[MAXPATHLEN]; @@ -3999,15 +4027,6 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) return (zfs_error(hdl, EZFS_NOCAP, errbuf)); } - slice_size = vtoc->efi_last_u_lba + 1; - slice_size -= EFI_MIN_RESV_SIZE; - if (start_block == MAXOFFSET_T) - start_block = NEW_START_BLOCK; - slice_size -= start_block; - - vtoc->efi_parts[0].p_start = start_block; - vtoc->efi_parts[0].p_size = slice_size; - /* * Why we use V_USR: V_BACKUP confuses users, and is considered * disposable by some EFI utilities (since EFI doesn't have a backup @@ -4016,12 +4035,103 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name) * etc. were all pretty specific. V_USR is as close to reality as we * can get, in the absence of V_OTHER. */ - vtoc->efi_parts[0].p_tag = V_USR; - (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); + /* first fix the partition start block */ + if (start_block == MAXOFFSET_T) + start_block = NEW_START_BLOCK; - vtoc->efi_parts[8].p_start = slice_size + start_block; - vtoc->efi_parts[8].p_size = resv; - vtoc->efi_parts[8].p_tag = V_RESERVED; + /* + * EFI System partition is using slice 0. + * ZFS is on slice 1 and slice 8 is reserved. + * We assume the GPT partition table without system + * partition has zfs p_start == NEW_START_BLOCK. + * If start_block != NEW_START_BLOCK, it means we have + * system partition. Correct solution would be to query/cache vtoc + * from existing vdev member. + */ + if (boot_type == ZPOOL_CREATE_BOOT_LABEL) { + if (boot_size % vtoc->efi_lbasize != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "boot partition size must be a multiple of %d"), + vtoc->efi_lbasize); + (void) close(fd); + efi_free(vtoc); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + /* + * System partition size checks. + * Note the 1MB is quite arbitrary value, since we + * are creating dedicated pool, it should be enough + * to hold fat + efi bootloader. May need to be + * adjusted if the bootloader size will grow. + */ + if (boot_size < 1024 * 1024) { + char buf[64]; + zfs_nicenum(boot_size, buf, sizeof (buf)); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Specified size %s for EFI System partition is too " + "small, the minimum size is 1MB."), buf); + (void) close(fd); + efi_free(vtoc); + return (zfs_error(hdl, EZFS_LABELFAILED, errbuf)); + } + /* 33MB is tested with mkfs -F pcfs */ + if (hdl->libzfs_printerr && + ((vtoc->efi_lbasize == 512 && + boot_size < 33 * 1024 * 1024) || + (vtoc->efi_lbasize == 4096 && + boot_size < 256 * 1024 * 1024))) { + char buf[64]; + zfs_nicenum(boot_size, buf, sizeof (buf)); + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "Warning: EFI System partition size %s is " + "not allowing to create FAT32 file\nsystem, which " + "may result in unbootable system.\n"), buf); + } + /* Adjust zfs partition start by size of system partition. */ + start_block += boot_size / vtoc->efi_lbasize; + } + + if (start_block == NEW_START_BLOCK) { + /* + * Use default layout. + * ZFS is on slice 0 and slice 8 is reserved. + */ + slice_size = vtoc->efi_last_u_lba + 1; + slice_size -= EFI_MIN_RESV_SIZE; + slice_size -= start_block; + if (slice != NULL) + *slice = 0; + + vtoc->efi_parts[0].p_start = start_block; + vtoc->efi_parts[0].p_size = slice_size; + + vtoc->efi_parts[0].p_tag = V_USR; + (void) strcpy(vtoc->efi_parts[0].p_name, "zfs"); + + vtoc->efi_parts[8].p_start = slice_size + start_block; + vtoc->efi_parts[8].p_size = resv; + vtoc->efi_parts[8].p_tag = V_RESERVED; + } else { + slice_size = start_block - NEW_START_BLOCK; + vtoc->efi_parts[0].p_start = NEW_START_BLOCK; + vtoc->efi_parts[0].p_size = slice_size; + vtoc->efi_parts[0].p_tag = V_SYSTEM; + (void) strcpy(vtoc->efi_parts[0].p_name, "loader"); + if (slice != NULL) + *slice = 1; + /* prepare slice 1 */ + slice_size = vtoc->efi_last_u_lba + 1 - slice_size; + slice_size -= resv; + slice_size -= NEW_START_BLOCK; + vtoc->efi_parts[1].p_start = start_block; + vtoc->efi_parts[1].p_size = slice_size; + vtoc->efi_parts[1].p_tag = V_USR; + (void) strcpy(vtoc->efi_parts[1].p_name, "zfs"); + + vtoc->efi_parts[8].p_start = slice_size + start_block; + vtoc->efi_parts[8].p_size = resv; + vtoc->efi_parts[8].p_tag = V_RESERVED; + } if (efi_write(fd, vtoc) != 0) { /* diff --git a/cddl/usr.sbin/zfsd/case_file.cc b/cddl/usr.sbin/zfsd/case_file.cc index c14009bdf678..19c4abe45fc9 100644 --- a/cddl/usr.sbin/zfsd/case_file.cc +++ b/cddl/usr.sbin/zfsd/case_file.cc @@ -239,6 +239,8 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) { ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); + zpool_boot_label_t boot_type; + uint64_t boot_size; if (pool == NULL || !RefreshVdevState()) { /* @@ -331,7 +333,13 @@ CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) } /* Write a label on the newly inserted disk. */ - if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { + if (zpool_is_bootable(pool)) + boot_type = ZPOOL_COPY_BOOT_LABEL; + else + boot_type = ZPOOL_NO_BOOT_LABEL; + boot_size = zpool_get_prop_int(pool, ZPOOL_PROP_BOOTSIZE, NULL); + if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str(), + boot_type, boot_size, NULL) != 0) { syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path (label): %s: %s\n", zpool_get_name(pool), VdevGUIDString().c_str(), diff --git a/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c b/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c index 9c717442ed7a..2a4f55d5b996 100644 --- a/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c +++ b/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c @@ -100,6 +100,10 @@ zpool_prop_init(void) PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>", "DEDUP"); + /* system partition size */ + zprop_register_number(ZPOOL_PROP_BOOTSIZE, "bootsize", 0, PROP_ONETIME, + ZFS_TYPE_POOL, "", "BOOTSIZE"); + /* default number properties */ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, PROP_DEFAULT, ZFS_TYPE_POOL, "", "VERSION"); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c index 6159b259247b..c10ca655072c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c @@ -481,6 +481,7 @@ metaslab_class_expandable_space(metaslab_class_t *mc) spa_config_enter(mc->mc_spa, SCL_VDEV, FTAG, RW_READER); for (int c = 0; c < rvd->vdev_children; c++) { + uint64_t tspace; vdev_t *tvd = rvd->vdev_child[c]; metaslab_group_t *mg = tvd->vdev_mg; @@ -493,9 +494,13 @@ metaslab_class_expandable_space(metaslab_class_t *mc) * Calculate if we have enough space to add additional * metaslabs. We report the expandable space in terms * of the metaslab size since that's the unit of expansion. + * Adjust by efi system partition size. */ - space += P2ALIGN(tvd->vdev_max_asize - tvd->vdev_asize, - 1ULL << tvd->vdev_ms_shift); + tspace = tvd->vdev_max_asize - tvd->vdev_asize; + if (tspace > mc->mc_spa->spa_bootsize) { + tspace -= mc->mc_spa->spa_bootsize; + } + space += P2ALIGN(tspace, 1ULL << tvd->vdev_ms_shift); } spa_config_exit(mc->mc_spa, SCL_VDEV, FTAG); return (space); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c index effd390d57c1..7ecf24f78573 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -2813,6 +2813,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config, spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation); spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode); spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand); + spa_prop_find(spa, ZPOOL_PROP_BOOTSIZE, &spa->spa_bootsize); spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO, &spa->spa_dedup_ditto); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h index 78cdaecf496f..3baf2e35e95e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h @@ -242,6 +242,7 @@ struct spa { int spa_mode; /* FREAD | FWRITE */ spa_log_state_t spa_log_state; /* log state */ uint64_t spa_autoexpand; /* lun expansion on/off */ + uint64_t spa_bootsize; /* efi system partition size */ ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */ uint64_t spa_ddt_stat_object; /* DDT statistics */ uint64_t spa_dedup_ditto; /* dedup ditto threshold */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index dab1c8245ce5..f4cedd724c4b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -2923,8 +2923,8 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs) * since that determines how much space the pool can expand. */ if (vd->vdev_aux == NULL && tvd != NULL && vd->vdev_max_asize != 0) { - vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize, - 1ULL << tvd->vdev_ms_shift); + vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize - + spa->spa_bootsize, 1ULL << tvd->vdev_ms_shift); } vs->vs_configured_ashift = vd->vdev_top != NULL ? vd->vdev_top->vdev_ashift : vd->vdev_ashift; diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h index 0a0b7713abc5..967ab6751c5f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h +++ b/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h @@ -211,6 +211,7 @@ typedef enum { ZPOOL_PROP_FRAGMENTATION, ZPOOL_PROP_LEAKED, ZPOOL_PROP_MAXBLOCKSIZE, + ZPOOL_PROP_BOOTSIZE, ZPOOL_NUM_PROPS } zpool_prop_t;