From a9be09f45f9fb89863944ed758705caf3ccc4b65 Mon Sep 17 00:00:00 2001 From: tsoome Date: Mon, 21 Sep 2020 09:01:10 +0000 Subject: [PATCH] loader: zfs should support bootonce an nextboot bootonce feature is temporary, one time boot, activated by "bectl activate -t BE", "bectl activate -T BE" will reset the bootonce flag. By default, the bootonce setting is reset on attempt to boot and the next boot will use previously active BE. By setting zfs_bootonce_activate="YES" in rc.conf, the bootonce BE will be set permanently active. bootonce dataset name is recorded in boot pool labels, bootenv area. in case of nextboot, the nextboot_enable boolean variable is recorded in freebsd:nvstore nvlist, also stored in boot pool label bootenv area. On boot, the loader will process /boot/nextboot.conf if nextboot_enable is "YES", and will set nextboot_enable to "NO", preventing /boot/nextboot.conf processing on next boot. bootonce and nextboot features are usable in both UEFI and BIOS boot. To use bootonce/nextboot features, the boot loader needs to be updated on disk; if loader.efi is stored on ESP, then ESP needs to be updated and for BIOS boot, stage2 (zfsboot or gptzfsboot) needs to be updated (gpart or other tools). At this time, only lua loader is updated. Sponsored by: Netflix, Klara Inc. Differential Revision: https://reviews.freebsd.org/D25512 --- Makefile.inc1 | 6 +- cddl/lib/Makefile | 3 + cddl/lib/libzfsbootenv/Makefile | 33 + cddl/lib/libzpool/Makefile | 1 + lib/libbe/Makefile | 4 +- lib/libbe/be.c | 69 +- lib/libbe/be.h | 1 + lib/libbe/be_impl.h | 1 + lib/libbe/be_info.c | 11 + lib/libbe/libbe.3 | 26 +- libexec/rc/rc.conf | 1 + libexec/rc/rc.d/zfsbe | 18 + rescue/rescue/Makefile | 2 +- sbin/bectl/Makefile | 1 + sbin/bectl/bectl.8 | 19 +- sbin/bectl/bectl.c | 22 +- sbin/bectl/bectl_list.c | 7 +- sbin/reboot/nextboot.8 | 7 +- sbin/reboot/nextboot.sh | 21 +- sbin/zfsbootcfg/Makefile | 7 +- sbin/zfsbootcfg/zfsbootcfg.8 | 103 +- sbin/zfsbootcfg/zfsbootcfg.c | 310 +++-- share/man/man5/rc.conf.5 | 8 +- share/mk/bsd.libnames.mk | 1 + share/mk/src.libnames.mk | 5 +- stand/common/bootstrap.h | 31 + stand/common/nvstore.c | 310 +++++ stand/defaults/loader.conf | 1 - stand/efi/boot1/Makefile | 4 +- stand/efi/boot1/zfs_module.c | 2 +- stand/efi/loader/Makefile | 2 + stand/efi/loader/main.c | 8 +- stand/forth/support.4th | 13 +- stand/i386/gptzfsboot/Makefile | 4 +- stand/i386/loader/Makefile | 5 + stand/i386/loader/main.c | 12 + stand/i386/zfsboot/Makefile | 4 +- stand/i386/zfsboot/zfsboot.c | 17 +- stand/libsa/zfs/Makefile.inc | 4 +- stand/libsa/zfs/libzfs.h | 52 +- stand/libsa/zfs/nvlist.c | 1582 ++++++++++++++++++----- stand/libsa/zfs/zfs.c | 878 ++++++++++--- stand/libsa/zfs/zfsimpl.c | 370 +++++- stand/loader.mk | 2 +- stand/lua/config.lua | 11 +- stand/userboot/test/test.c | 25 +- stand/userboot/userboot.h | 6 + stand/userboot/userboot/Makefile | 5 +- stand/userboot/userboot/main.c | 33 +- stand/userboot/userboot/userboot_disk.c | 18 +- sys/cddl/boot/zfs/zfsimpl.h | 39 +- tools/tools/zfsboottest/zfsboottest.c | 2 +- 52 files changed, 3347 insertions(+), 780 deletions(-) create mode 100644 cddl/lib/libzfsbootenv/Makefile create mode 100644 stand/common/nvstore.c diff --git a/Makefile.inc1 b/Makefile.inc1 index b7a1c0476d77..20019573ea26 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -2835,7 +2835,7 @@ _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_cddl_lib_libtpool} \ ${_cddl_lib_libzfs_core} ${_cddl_lib_libzfs} \ ${_cddl_lib_libzutil} \ - ${_cddl_lib_libctf} \ + ${_cddl_lib_libctf} ${_cddl_lib_libzfsbootenv} \ lib/libufs \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_secure_lib_libssl} \ @@ -2915,6 +2915,7 @@ _cddl_lib_libtpool= cddl/lib/libtpool _cddl_lib_libzutil= cddl/lib/libzutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libzfs= cddl/lib/libzfs +_cddl_lib_libzfsbootenv= cddl/lib/libzfsbootenv cddl/lib/libtpool__L: cddl/lib/libspl__L @@ -2928,7 +2929,8 @@ cddl/lib/libzfs__L: cddl/lib/libuutil__L cddl/lib/libavl__L lib/libgeom__L cddl/lib/libzfs__L: cddl/lib/libnvpair__L cddl/lib/libzutil__L cddl/lib/libzfs__L: secure/lib/libcrypto__L -lib/libbe__L: cddl/lib/libzfs__L +cddl/lib/libzfsbootenv__L: cddl/lib/libzfs__L +lib/libbe__L: cddl/lib/libzfs__L cddl/lib/libzfsbootenv__L .endif _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib diff --git a/cddl/lib/Makefile b/cddl/lib/Makefile index 399e314e76b1..38ab0358dde6 100644 --- a/cddl/lib/Makefile +++ b/cddl/lib/Makefile @@ -15,6 +15,7 @@ SUBDIR= drti \ libuutil \ ${_libzfs_core} \ ${_libzfs} \ + ${_libzfsbootenv} \ ${_libzpool} \ ${_libzutil} @@ -26,6 +27,7 @@ _libicp= libicp _libicp_rescue= libicp_rescue _libzfs= libzfs _libzutil= libzutil +_libzfsbootenv= libzfsbootenv .if ${MK_LIBTHR} != "no" _libzpool= libzpool _libtpool= libtpool @@ -40,6 +42,7 @@ SUBDIR_DEPEND_libzfs_core= libnvpair SUBDIR_DEPEND_libzfs= libavl libnvpair libumem libuutil libzfs_core libzutil SUBDIR_DEPEND_libzpool= libavl libnvpair libumem libicp SUBDIR_DEPEND_libzutil= libavl libtpool +SUBDIR_DEPEND_libzfsbootenv= libzfs libnvpair SUBDIR_PARALLEL= diff --git a/cddl/lib/libzfsbootenv/Makefile b/cddl/lib/libzfsbootenv/Makefile new file mode 100644 index 000000000000..f84718a10de7 --- /dev/null +++ b/cddl/lib/libzfsbootenv/Makefile @@ -0,0 +1,33 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/contrib/openzfs/lib/libzfsbootenv +.PATH: ${SRCTOP}/sys/contrib/openzfs/include + +PACKAGE= runtime +LIB= zfsbootenv +SHLIB_MAJOR= 1 + +LIBADD= zfs +LIBADD+= nvpair + +INCS= libzfsbootenv.h +USER_C= \ + lzbe_device.c \ + lzbe_util.c \ + lzbe_pair.c + +SRCS= $(USER_C) + +CSTD= c99 +CFLAGS+= -DIN_BASE +CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include +CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/ +CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/lib/libspl/include/os/freebsd +CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include +CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/module/icp/include +CFLAGS+= -include ${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h +CFLAGS+= -DHAVE_ISSETUGID +CFLAGS+= -include ${SRCTOP}/sys/modules/zfs/zfs_config.h +CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include/os/freebsd/zfs + +.include diff --git a/cddl/lib/libzpool/Makefile b/cddl/lib/libzpool/Makefile index 2eefb3bc47bb..643bafed8b74 100644 --- a/cddl/lib/libzpool/Makefile +++ b/cddl/lib/libzpool/Makefile @@ -228,6 +228,7 @@ CFLAGS+= \ -I${ZFSTOP}/lib/libspl/include \ -I${ZFSTOP}/lib/libspl/include/os/freebsd \ -I${SRCTOP}/sys \ + -I${ZFSTOP}/include/os/freebsd/zfs \ -I${SRCTOP}/cddl/compat/opensolaris/include \ -I${ZFSTOP}/module/icp/include \ -include ${ZFSTOP}/include/os/freebsd/spl/sys/ccompile.h \ diff --git a/lib/libbe/Makefile b/lib/libbe/Makefile index 2ef809d97ea2..d3d5beee293e 100644 --- a/lib/libbe/Makefile +++ b/lib/libbe/Makefile @@ -13,7 +13,9 @@ INCS= be.h MAN= libbe.3 LIBADD+= zfs -LIBADD+= nvpair spl +LIBADD+= nvpair +LIBADD+= spl +LIBADD+= zfsbootenv CFLAGS+= -DIN_BASE -DHAVE_RPC_TYPES CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include diff --git a/lib/libbe/be.c b/lib/libbe/be.c index 72f937453b08..644cf90a6fd7 100644 --- a/lib/libbe/be.c +++ b/lib/libbe/be.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "be.h" #include "be_impl.h" @@ -1221,43 +1222,20 @@ be_add_child(libbe_handle_t *lbh, const char *child_path, bool cp_if_exists) } #endif /* SOON */ -static int -be_set_nextboot(libbe_handle_t *lbh, nvlist_t *config, uint64_t pool_guid, - const char *zfsdev) -{ - nvlist_t **child; - uint64_t vdev_guid; - int c, children; - - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, &child, - &children) == 0) { - for (c = 0; c < children; ++c) - if (be_set_nextboot(lbh, child[c], pool_guid, zfsdev) != 0) - return (1); - return (0); - } - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, - &vdev_guid) != 0) { - return (1); - } - - if (zpool_nextboot(lbh->lzh, pool_guid, vdev_guid, zfsdev) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - - return (0); -} - /* - * Deactivate old BE dataset; currently just sets canmount=noauto + * Deactivate old BE dataset; currently just sets canmount=noauto or + * resets boot once configuration. */ -static int -be_deactivate(libbe_handle_t *lbh, const char *ds) +int +be_deactivate(libbe_handle_t *lbh, const char *ds, bool temporary) { zfs_handle_t *zfs; + if (temporary) { + return (lzbe_set_boot_device( + zpool_get_name(lbh->active_phandle), lzbe_add, NULL)); + } + if ((zfs = zfs_open(lbh->lzh, ds, ZFS_TYPE_DATASET)) == NULL) return (1); if (zfs_prop_set(zfs, "canmount", "noauto") != 0) @@ -1270,10 +1248,8 @@ int be_activate(libbe_handle_t *lbh, const char *bootenv, bool temporary) { char be_path[BE_MAXPATHLEN]; - char buf[BE_MAXPATHLEN]; - nvlist_t *config, *dsprops, *vdevs; + nvlist_t *dsprops; char *origin; - uint64_t pool_guid; zfs_handle_t *zhp; int err; @@ -1284,27 +1260,10 @@ be_activate(libbe_handle_t *lbh, const char *bootenv, bool temporary) return (set_error(lbh, err)); if (temporary) { - config = zpool_get_config(lbh->active_phandle, NULL); - if (config == NULL) - /* config should be fetchable... */ - return (set_error(lbh, BE_ERR_UNKNOWN)); - - if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, - &pool_guid) != 0) - /* Similarly, it shouldn't be possible */ - return (set_error(lbh, BE_ERR_UNKNOWN)); - - /* Expected format according to zfsbootcfg(8) man */ - snprintf(buf, sizeof(buf), "zfs:%s:", be_path); - - /* We have no config tree */ - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &vdevs) != 0) - return (set_error(lbh, BE_ERR_NOPOOL)); - - return (be_set_nextboot(lbh, vdevs, pool_guid, buf)); + return (lzbe_set_boot_device( + zpool_get_name(lbh->active_phandle), lzbe_add, be_path)); } else { - if (be_deactivate(lbh, lbh->bootfs) != 0) + if (be_deactivate(lbh, lbh->bootfs, false) != 0) return (-1); /* Obtain bootenv zpool */ diff --git a/lib/libbe/be.h b/lib/libbe/be.h index 3a99c177e4bd..960b1adf2457 100644 --- a/lib/libbe/be.h +++ b/lib/libbe/be.h @@ -81,6 +81,7 @@ int be_prop_list_alloc(nvlist_t **be_list); void be_prop_list_free(nvlist_t *be_list); int be_activate(libbe_handle_t *, const char *, bool); +int be_deactivate(libbe_handle_t *, const char *, bool); bool be_is_auto_snapshot_name(libbe_handle_t *, const char *); diff --git a/lib/libbe/be_impl.h b/lib/libbe/be_impl.h index 98fd3b29f932..0b0f0db3cb5c 100644 --- a/lib/libbe/be_impl.h +++ b/lib/libbe/be_impl.h @@ -63,6 +63,7 @@ typedef struct prop_data { nvlist_t *list; libbe_handle_t *lbh; bool single_object; /* list will contain props directly */ + char *bootonce; } prop_data_t; int prop_list_builder_cb(zfs_handle_t *, void *); diff --git a/lib/libbe/be_info.c b/lib/libbe/be_info.c index 7f1b58a071ca..745f4f9be23b 100644 --- a/lib/libbe/be_info.c +++ b/lib/libbe/be_info.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include "be.h" #include "be_impl.h" @@ -108,6 +109,7 @@ be_get_bootenv_props(libbe_handle_t *lbh, nvlist_t *dsnvl) data.lbh = lbh; data.list = dsnvl; data.single_object = false; + data.bootonce = NULL; return (be_proplist_update(&data)); } @@ -121,6 +123,7 @@ be_get_dataset_props(libbe_handle_t *lbh, const char *name, nvlist_t *props) data.lbh = lbh; data.list = props; data.single_object = true; + data.bootonce = NULL; if ((snap_hdl = zfs_open(lbh->lzh, name, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT)) == NULL) return (BE_ERR_ZFSOPEN); @@ -140,6 +143,7 @@ be_get_dataset_snapshots(libbe_handle_t *lbh, const char *name, nvlist_t *props) data.lbh = lbh; data.list = props; data.single_object = false; + data.bootonce = NULL; if ((ds_hdl = zfs_open(lbh->lzh, name, ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); @@ -179,6 +183,10 @@ prop_list_builder_cb(zfs_handle_t *zfs_hdl, void *data_p) dataset = zfs_get_name(zfs_hdl); nvlist_add_string(props, "dataset", dataset); + if (data->bootonce != NULL && + strcmp(dataset, data->bootonce) == 0) + nvlist_add_boolean_value(props, "bootonce", true); + name = strrchr(dataset, '/') + 1; nvlist_add_string(props, "name", name); @@ -246,6 +254,9 @@ be_proplist_update(prop_data_t *data) ZFS_TYPE_FILESYSTEM)) == NULL) return (BE_ERR_ZFSOPEN); + (void) lzbe_get_boot_device(zpool_get_name(data->lbh->active_phandle), + &data->bootonce); + /* XXX TODO: some error checking here */ zfs_iter_filesystems(root_hdl, prop_list_builder_cb, data); diff --git a/lib/libbe/libbe.3 b/lib/libbe/libbe.3 index 0a93b30dbcbc..b8bbc29cf1d3 100644 --- a/lib/libbe/libbe.3 +++ b/lib/libbe/libbe.3 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 16, 2019 +.Dd July 22, 2020 .Dt LIBBE 3 .Os .Sh NAME @@ -78,6 +78,10 @@ .Pp .Ft int .Fn be_activate "libbe_handle_t *hdl" "const char *be_name" "bool temporary" +.Pp +.Ft int +.Fn be_deactivate "libbe_handle_t *hdl" "const char *be_name" "bool temporary" +.Pp .Ft int .Fn be_destroy "libbe_handle_t *hdl" "const char *be_name" "int options" .Pp @@ -270,8 +274,24 @@ If the .Fa temporary flag is set, then it will be active for the next boot only, as done by .Xr zfsbootcfg 8 . -Next boot functionality is currently only available when booting in x86 BIOS -mode. +.Pp +The +.Fn be_deactivate +function deactivates a boot environment. +If the +.Fa temporary +flag is set, then it will cause removal of boot once configuration, set by +.Fn be_activate +function or by +.Xr zfsbootcfg 8 . +If the +.Fa temporary +flag is not set, +.Fn be_deactivate +function will set zfs +.Dv canmount +property to +.Dv noauto . .Pp The .Fn be_destroy diff --git a/libexec/rc/rc.conf b/libexec/rc/rc.conf index 1d01b551dc6a..4d9406cf73c5 100644 --- a/libexec/rc/rc.conf +++ b/libexec/rc/rc.conf @@ -64,6 +64,7 @@ rc_conf_files="/etc/rc.conf /etc/rc.conf.local" # ZFS support zfs_enable="NO" # Set to YES to automatically mount ZFS file systems +zfs_bootonce_activate="NO" # Set YES to make successful bootonce BE permanent # ZFSD support zfsd_enable="NO" # Set to YES to automatically start the ZFS fault diff --git a/libexec/rc/rc.d/zfsbe b/libexec/rc/rc.d/zfsbe index 3c852017aa41..e0a30c5a16b4 100755 --- a/libexec/rc/rc.d/zfsbe +++ b/libexec/rc/rc.d/zfsbe @@ -48,6 +48,21 @@ mount_subordinate() done } +activate_bootonce() +{ + local _dev + local _bootonce + local _be + + _dev=$1 + _be=${_dev##*/} + + _bootonce="`kenv -q zfs-bootonce`" + if [ "$_bootonce" = "zfs:${_dev}:" ] ; then + bectl activate $_be + fi +} + be_start() { if [ `$SYSCTL_N security.jail.jailed` -eq 1 ]; then @@ -57,6 +72,9 @@ be_start() [ $_mp = "/" ] || continue if [ $_type = "zfs" ] ; then mount_subordinate $_dev + if checkyesno zfs_bootonce_activate; then + activate_bootonce $_dev + fi fi break done diff --git a/rescue/rescue/Makefile b/rescue/rescue/Makefile index b468a7827914..f1d563d5c8fd 100644 --- a/rescue/rescue/Makefile +++ b/rescue/rescue/Makefile @@ -129,7 +129,7 @@ CRUNCH_PROGS_usr.sbin+= zdb CRUNCH_LIBS+= -l80211 -lalias -lcam -lncursesw -ldevstat -lipsec -llzma .if ${MK_ZFS} != "no" CRUNCH_LIBS+= -lavl -lzpool -lzfs_core -lzfs -lnvpair -lpthread -luutil -lumem -CRUNCH_LIBS+= -lbe -lzutil -ltpool -lspl -licp_rescue +CRUNCH_LIBS+= -lbe -lzfsbootenv -lzutil -ltpool -lspl -licp_rescue .else # liblzma needs pthread CRUNCH_LIBS+= -lpthread diff --git a/sbin/bectl/Makefile b/sbin/bectl/Makefile index e261c85e1e10..6fa84f48661d 100644 --- a/sbin/bectl/Makefile +++ b/sbin/bectl/Makefile @@ -12,6 +12,7 @@ LIBADD+= be \ nvpair \ spl \ util \ + zfsbootenv CFLAGS+= -DIN_BASE CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include diff --git a/sbin/bectl/bectl.8 b/sbin/bectl/bectl.8 index e45bc6b99551..0638a0c10ecb 100644 --- a/sbin/bectl/bectl.8 +++ b/sbin/bectl/bectl.8 @@ -17,7 +17,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 17, 2020 +.Dd August 25, 2020 .Dt BECTL 8 .Os .Sh NAME @@ -26,7 +26,7 @@ .Sh SYNOPSIS .Nm .Cm activate -.Op Fl t +.Op Fl t | Fl T .Ar beName .Nm .Cm check @@ -95,7 +95,7 @@ The following commands are supported by .Bl -tag -width activate .It Xo .Cm activate -.Op Fl t +.Op Fl t | Fl T .Ar beName .Xc Activate the given @@ -104,6 +104,13 @@ as the default boot filesystem. If the .Fl t flag is given, this takes effect only for the next boot. +Flag +.Fl T +removes temporary boot once configuration. +Without temporary configuration, the next boot will use zfs dataset specified +in boot pool +.Ar bootfs +property. .It Xo .Cm check .Xc @@ -260,8 +267,10 @@ field indicates whether the boot environment is active now .Pq Em \&N ; active on reboot .Pq Em \&R ; -or both -.Pq Em \&NR . +is used on next boot once +.Pq Em \&T ; +or combination of +.Pq Em \&NRT . .Pp .Bl -tag -width indent .It Fl a diff --git a/sbin/bectl/bectl.c b/sbin/bectl/bectl.c index 31ece42c6ff2..8a319a0d67f6 100644 --- a/sbin/bectl/bectl.c +++ b/sbin/bectl/bectl.c @@ -74,6 +74,7 @@ usage(bool explicit) "\tbectl add (path)*\n" #endif "\tbectl activate [-t] beName\n" + "\tbectl activate [-T]\n" "\tbectl check\n" "\tbectl create [-r] [-e {nonActiveBe | beName@snapshot}] beName\n" "\tbectl create [-r] beName@snapshot\n" @@ -141,14 +142,22 @@ static int bectl_cmd_activate(int argc, char *argv[]) { int err, opt; - bool temp; + bool temp, reset; temp = false; - while ((opt = getopt(argc, argv, "t")) != -1) { + reset = false; + while ((opt = getopt(argc, argv, "tT")) != -1) { switch (opt) { case 't': + if (reset) + return (usage(false)); temp = true; break; + case 'T': + if (temp) + return (usage(false)); + reset = true; + break; default: fprintf(stderr, "bectl activate: unknown option '-%c'\n", optopt); @@ -159,11 +168,18 @@ bectl_cmd_activate(int argc, char *argv[]) argc -= optind; argv += optind; - if (argc != 1) { + if (argc != 1 && (!reset || argc != 0)) { fprintf(stderr, "bectl activate: wrong number of arguments\n"); return (usage(false)); } + if (reset) { + if ((err = be_deactivate(be, NULL, reset)) == 0) + printf("Temporary activation removed\n"); + else + printf("Failed to remove temporary activation\n"); + return (err); + } /* activate logic goes here */ if ((err = be_activate(be, argv[0], temp)) != 0) diff --git a/sbin/bectl/bectl_list.c b/sbin/bectl/bectl_list.c index 757a773e6d32..e43c3000d8f2 100644 --- a/sbin/bectl/bectl_list.c +++ b/sbin/bectl/bectl_list.c @@ -182,7 +182,7 @@ print_info(const char *name, nvlist_t *dsprops, struct printc *pc) const char *oname; char *dsname, *propstr; int active_colsz; - boolean_t active_now, active_reboot; + boolean_t active_now, active_reboot, bootonce; dsname = NULL; originprops = NULL; @@ -230,6 +230,11 @@ print_info(const char *name, nvlist_t *dsprops, struct printc *pc) printf("R"); active_colsz--; } + if (nvlist_lookup_boolean_value(dsprops, "bootonce", + &bootonce) == 0 && bootonce) { + printf("T"); + active_colsz--; + } if (active_colsz == pc->active_colsz_def) { printf("-"); active_colsz--; diff --git a/sbin/reboot/nextboot.8 b/sbin/reboot/nextboot.8 index 8b72b9ad10de..d1e8cc38788e 100644 --- a/sbin/reboot/nextboot.8 +++ b/sbin/reboot/nextboot.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 9, 2016 +.Dd September 19, 2020 .Dt NEXTBOOT 8 .Os .Sh NAME @@ -136,8 +136,3 @@ It is also my first attempt to write in Forth. Finally, it does some evil things like writing to the file system before it has been checked. If it scrambles your file system, do not blame me. -.Pp -.Xr loader 8 -is only able to read ZFS, not write to it. -.Pa nextboot.conf -will NOT be reset in case of a kernel boot failure. diff --git a/sbin/reboot/nextboot.sh b/sbin/reboot/nextboot.sh index e975215f695a..c2d1076337b9 100644 --- a/sbin/reboot/nextboot.sh +++ b/sbin/reboot/nextboot.sh @@ -33,6 +33,7 @@ delete="NO" kenv= force="NO" nextboot_file="/boot/nextboot.conf" +zfs= add_kenv() { @@ -106,26 +107,26 @@ if [ -n "${kernel}" -a ${force} = "NO" -a ! -d /boot/${kernel} ]; then exit 1 fi -df -Tn "/boot/" 2>/dev/null | while read _fs _type _other ; do +zfs=$(df -Tn "/boot/" 2>/dev/null | while read _fs _type _other ; do [ "zfs" = "${_type}" ] || continue - cat 1>&2 <<-EOF - WARNING: loader(8) has only R/O support for ZFS - nextboot.conf will NOT be reset in case of kernel boot failure - EOF -done + echo "${_fs%/ROOT/*}" +done) set -e nextboot_tmp=$(mktemp $(dirname ${nextboot_file})/nextboot.XXXXXX) -if [ ${append} = "YES" -a -f ${nextboot_file} ]; then - cp -f ${nextboot_file} ${nextboot_tmp} -fi - +if [ -n ${zfs} ]; then + zfsbootcfg -z ${zfs} -n freebsd:nvstore -k nextboot_enable -v YES + cat >> ${nextboot_tmp} << EOF +$kenv +EOF +else cat >> ${nextboot_tmp} << EOF nextboot_enable="YES" $kenv EOF +fi fsync ${nextboot_tmp} diff --git a/sbin/zfsbootcfg/Makefile b/sbin/zfsbootcfg/Makefile index 8bc73ffa1e06..566ba2261705 100644 --- a/sbin/zfsbootcfg/Makefile +++ b/sbin/zfsbootcfg/Makefile @@ -2,14 +2,9 @@ # $FreeBSD$ PROG= zfsbootcfg -WARNS?= 2 MAN= zfsbootcfg.8 -LIBADD+=zfs -LIBADD+=nvpair -LIBADD+=umem -LIBADD+=uutil -LIBADD+=geom +LIBADD+=zfsbootenv CFLAGS+= -DIN_BASE CFLAGS+= -I${SRCTOP}/sys/contrib/openzfs/include diff --git a/sbin/zfsbootcfg/zfsbootcfg.8 b/sbin/zfsbootcfg/zfsbootcfg.8 index 8b9b36742db7..797e5d4edaaa 100644 --- a/sbin/zfsbootcfg/zfsbootcfg.8 +++ b/sbin/zfsbootcfg/zfsbootcfg.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 24, 2017 +.Dd July 22, 2020 .Dt ZFSBOOTCFG 8 .Os .Sh NAME @@ -33,39 +33,88 @@ .Sh SYNOPSIS .Nm .Ao Ar options Ac +.Nm +.Op Fl n Ar name +.Op Fl k Ar key +.Op Fl p +.Op Fl t Ar type +.Op Fl v Ar value +.Op Fl z Ar pool +.Nm .Sh DESCRIPTION .Nm is used to set .Xr boot.config 5 Ns -style options to be used by -.Xr zfsboot 8 -or +.Xr zfsboot 8 , .Xr gptzfsboot 8 +or +.Xr loader 8 the next time the machine is booted. Once .Xr zfsboot 8 or .Xr gptzfsboot 8 +or +.Xr loader 8 reads the information, it is deleted. If booting fails, the machine automatically reverts to the previous boot configuration. -The information is stored in a special reserved area of a ZFS pool. -.Xr zfsboot 8 -or -.Xr gptzfsboot 8 -read the boot option information from the first disk found in the first -ZFS pool found. +The information is stored in a special boot environment area of a ZFS pool. +.Pp +If used without arguments, +.Nm +will output the current boot configuration, if set. +.Pp +The following options are supported by +.Nm : +.Bl -tag -width indent +.It Fl k Ar key +Define key for +.Ao key , value Ac +pair. +.It Fl n Ar name +Update nvlist +.Ar name . +.It Fl p +Print all information stored in ZFS pool bootenv area. +.It Fl t Ar type +Set type of +.Ar value +used in +.Ao key , value Ac +pair. +Currently supported types are: +.Bl -tag -width indent -compact +.It Ar DATA_TYPE_BYTE +.It Ar DATA_TYPE_INT8 +.It Ar DATA_TYPE_UINT8 +.It Ar DATA_TYPE_INT16 +.It Ar DATA_TYPE_UINT16 +.It Ar DATA_TYPE_INT32 +.It Ar DATA_TYPE_UINT32 +.It Ar DATA_TYPE_INT64 +.It Ar DATA_TYPE_UINT64 +.It Ar DATA_TYPE_BOOLEAN_VALUE +.It Ar DATA_TYPE_STRING +.El +.Pp +If not specified, the default is +.Ar DATA_TYPE_STRING . +.It Fl v Ar value +Define value for +.Ao key , value Ac +pair. +.It Fl z Ar pool +Operate on +.Ar pool . +.El .Sh ENVIRONMENT -.Bl -tag -width vfs.zfs.boot.primary_pool -compact -.It Ev vfs.zfs.boot.primary_pool +.Bl -tag -width vfs.root.mountfrom -compact +.It Ev vfs.root.mountfrom The .Xr kenv 1 variable that identifies a pool for which the options are written. -.It Ev vfs.zfs.boot.primary_vdev -The -.Xr kenv 1 -variable that identifies a disk within the pool where the options -are written. .El .Sh EXAMPLES Try to boot to a new @@ -81,7 +130,9 @@ To clear the boot options: .Dl "zfsbootcfg """" .Sh SEE ALSO .Xr boot.config 5 , +.Xr bectl 8 , .Xr gptzfsboot 8 , +.Xr loader 8 , .Xr zfsboot 8 .Sh HISTORY .Nm @@ -90,23 +141,3 @@ appeared in .Sh AUTHORS This manual page was written by .An Andriy Gapon Aq Mt avg@FreeBSD.org . -.Sh CAVEATS -At the moment, -.Nm -uses the -.Ev vfs.zfs.boot.primary_pool -and -.Ev vfs.zfs.boot.primary_vdev -.Xr kenv 1 -variables to determine a ZFS pool and a disk in it where the options -are to be stored. -The variables are set by the ZFS boot chain, so there is an assumption -that the same boot disk is going to be used for the next reboot. -There is no -.Nm -option to specify a different pool or a different disk. -.Pp -.Nm -should be extended to install new -.Xr zfsboot 8 -blocks in a ZFS pool. diff --git a/sbin/zfsbootcfg/zfsbootcfg.c b/sbin/zfsbootcfg/zfsbootcfg.c index 4ff1a35e2ec7..f4b2eeabcc3b 100644 --- a/sbin/zfsbootcfg/zfsbootcfg.c +++ b/sbin/zfsbootcfg/zfsbootcfg.c @@ -32,115 +32,257 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include +#include -#include +#include -/* Keep in sync with zfsboot.c. */ -#define MAX_COMMAND_LEN 512 +#ifndef ZFS_MAXNAMELEN +#define ZFS_MAXNAMELEN 256 +#endif -int -install_bootonce(libzfs_handle_t *hdl, uint64_t pool_guid, nvlist_t *nv, - const char * const data) +static int +add_pair(const char *name, const char *nvlist, const char *key, + const char *type, const char *value) { - nvlist_t **child; - uint_t children = 0; - uint64_t guid; + void *data, *nv; + size_t size; int rv; + char *end; - (void) nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, - &children); - - for (int c = 0; c < children; c++) { - rv = install_bootonce(hdl, pool_guid, child[c], data); - } - - if (children > 0) + rv = lzbe_nvlist_get(name, nvlist, &nv); + if (rv != 0) return (rv); - if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0) { - perror("can't get vdev guid"); - return (1); + data = NULL; + rv = EINVAL; + if (strcmp(type, "DATA_TYPE_STRING") == 0) { + data = __DECONST(void *, value); + size = strlen(data) + 1; + rv = lzbe_add_pair(nv, key, type, data, size); + } else if (strcmp(type, "DATA_TYPE_UINT64") == 0) { + uint64_t v; + + v = strtoull(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_INT64") == 0) { + int64_t v; + + v = strtoll(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_UINT32") == 0) { + uint32_t v; + + v = strtoul(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_INT32") == 0) { + int32_t v; + + v = strtol(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_UINT16") == 0) { + uint16_t v; + + v = strtoul(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_INT16") == 0) { + int16_t v; + + v = strtol(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_UINT8") == 0) { + uint8_t v; + + v = strtoul(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_INT8") == 0) { + int8_t v; + + v = strtol(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_BYTE") == 0) { + uint8_t v; + + v = strtoul(value, &end, 0); + if (errno != 0 || *end != '\0') + goto done; + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); + } else if (strcmp(type, "DATA_TYPE_BOOLEAN_VALUE") == 0) { + int32_t v; + + v = strtol(value, &end, 0); + if (errno != 0 || *end != '\0') { + if (strcasecmp(value, "YES") == 0) + v = 1; + else if (strcasecmp(value, "NO") == 0) + v = 0; + if (strcasecmp(value, "true") == 0) + v = 1; + else if (strcasecmp(value, "false") == 0) + v = 0; + else goto done; + } + size = sizeof (v); + rv = lzbe_add_pair(nv, key, type, &v, size); } - if (zpool_nextboot(hdl, pool_guid, guid, data) != 0) { - perror("ZFS_IOC_NEXTBOOT failed"); - return (1); - } - return (0); + + if (rv == 0) + rv = lzbe_nvlist_set(name, nvlist, nv); + +done: + lzbe_nvlist_free(nv); + return (rv); } -int main(int argc, const char * const *argv) +static int +delete_pair(const char *name, const char *nvlist, const char *key) { - char buf[32], *name; - libzfs_handle_t *hdl; - zpool_handle_t *zphdl; - uint64_t pool_guid; - nvlist_t *nv, *config; + void *nv; int rv; - int len; - if (argc != 2) { + rv = lzbe_nvlist_get(name, nvlist, &nv); + if (rv == 0) { + rv = lzbe_remove_pair(nv, key); + } + if (rv == 0) + rv = lzbe_nvlist_set(name, nvlist, nv); + + lzbe_nvlist_free(nv); + return (rv); +} + +/* + * Usage: zfsbootcfg [-z pool] [-d key] [-k key -t type -v value] [-p] + * zfsbootcfg [-z pool] -n nvlist [-d key] [-k key -t type -v value] [-p] + * + * if nvlist is set, we will update nvlist in bootenv. + * if nvlist is not set, we update pairs in bootenv. + */ +int +main(int argc, char * const *argv) +{ + char buf[ZFS_MAXNAMELEN], *name; + const char *key, *value, *type, *nvlist; + int rv; + bool print, delete; + + nvlist = NULL; + name = NULL; + key = NULL; + type = NULL; + value = NULL; + print = delete = false; + while ((rv = getopt(argc, argv, "d:k:n:pt:v:z:")) != -1) { + switch (rv) { + case 'd': + delete = true; + key = optarg; + break; + case 'k': + key = optarg; + break; + case 'n': + nvlist = optarg; + break; + case 'p': + print = true; + break; + case 't': + type = optarg; + break; + case 'v': + value = optarg; + break; + case 'z': + name = optarg; + break; + } + } + + argc -= optind; + argv += optind; + + if (argc == 1) + value = argv[0]; + + if (argc > 1) { fprintf(stderr, "usage: zfsbootcfg \n"); return (1); } - len = strlen(argv[1]); - if (len >= MAX_COMMAND_LEN) { - fprintf(stderr, "options string is too long\n"); - return (1); + if (name == NULL) { + rv = kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)); + if (rv <= 0) { + perror("can't get vfs.root.mountfrom"); + return (1); + } + + if (strncmp(buf, "zfs:", 4) == 0) { + name = strchr(buf + 4, '/'); + if (name != NULL) + *name = '\0'; + name = buf + 4; + } else { + perror("not a zfs root"); + return (1); + } } - if (kenv(KENV_GET, "vfs.root.mountfrom", buf, sizeof(buf)) <= 0) { - perror("can't get vfs.root.mountfrom"); - return (1); + rv = 0; + if (key != NULL || value != NULL) { + if (type == NULL) + type = "DATA_TYPE_STRING"; + + if (delete) + rv = delete_pair(name, nvlist, key); + else if (key == NULL || strcmp(key, "command") == 0) + rv = lzbe_set_boot_device(name, lzbe_add, value); + else + rv = add_pair(name, nvlist, key, type, value); + + if (rv == 0) + printf("zfs bootenv is successfully written\n"); + else + printf("error: %d\n", rv); + } else if (!print) { + char *ptr; + + if (lzbe_get_boot_device(name, &ptr) == 0) { + printf("zfs:%s:\n", ptr); + free(ptr); + } } - if (strncmp(buf, "zfs:", 4) == 0) { - name = strchr(buf + 4, '/'); - if (name != NULL) - *name = '\0'; - name = buf + 4; - } else { - perror("not a zfs root"); - return (1); - } - - if ((hdl = libzfs_init()) == NULL) { - (void) fprintf(stderr, "internal error: failed to " - "initialize ZFS library\n"); - return (1); + if (print) { + rv = lzbe_bootenv_print(name, nvlist, stdout); } - zphdl = zpool_open(hdl, name); - if (zphdl == NULL) { - perror("can't open pool"); - libzfs_fini(hdl); - return (1); - } - - pool_guid = zpool_get_prop_int(zphdl, ZPOOL_PROP_GUID, NULL); - - config = zpool_get_config(zphdl, NULL); - if (config == NULL) { - perror("can't get pool config"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); - } - - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) != 0) { - perror("failed to get vdev tree"); - zpool_close(zphdl); - libzfs_fini(hdl); - return (1); - } - - rv = install_bootonce(hdl, pool_guid, nv, argv[1]); - - zpool_close(zphdl); - libzfs_fini(hdl); - if (rv == 0) - printf("zfs next boot options are successfully written\n"); return (rv); } diff --git a/share/man/man5/rc.conf.5 b/share/man/man5/rc.conf.5 index 38bb2f953b07..b824426a91d2 100644 --- a/share/man/man5/rc.conf.5 +++ b/share/man/man5/rc.conf.5 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 11, 2020 +.Dd Sep 21, 2020 .Dt RC.CONF 5 .Os .Sh NAME @@ -4574,6 +4574,12 @@ If set to removes empty .Dq Li rc.conf.d files. +.It Va zfs_bootonce_activate +.Pq Vt bool +If set to +.Dq Li YES , +and a boot environment marked bootonce is successfully booted, +it will be made permanently active. .El .Sh FILES .Bl -tag -width ".Pa /etc/defaults/rc.conf" -compact diff --git a/share/mk/bsd.libnames.mk b/share/mk/bsd.libnames.mk index 4e6a387aa39a..ad7d549cad4c 100644 --- a/share/mk/bsd.libnames.mk +++ b/share/mk/bsd.libnames.mk @@ -169,6 +169,7 @@ LIBYPCLNT?= ${LIBDESTDIR}${LIBDIR_BASE}/libypclnt.a LIBZ?= ${LIBDESTDIR}${LIBDIR_BASE}/libz.a LIBZFS?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs.a LIBZFS_CORE?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfs_core.a +LIBZFSBOOTENV?= ${LIBDESTDIR}${LIBDIR_BASE}/libzfsbootenv.a LIBZPOOL?= ${LIBDESTDIR}${LIBDIR_BASE}/libzpool.a LIBZUTIL?= ${LIBDESTDIR}${LIBDIR_BASE}/libzutil.a diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index 8a7768a8318c..612cd6604025 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -202,6 +202,7 @@ _LIBRARIES= \ z \ zfs_core \ zfs \ + zfsbootenv \ zpool \ zutil @@ -385,10 +386,11 @@ _DP_tpool= spl _DP_uutil= avl spl _DP_zfs= md pthread umem util uutil m avl bsdxml crypto geom nvpair \ z zfs_core zutil +_DP_zfsbootenv= zfs nvpair _DP_zfs_core= nvpair _DP_zpool= md pthread z icp spl nvpair avl umem _DP_zutil= avl tpool -_DP_be= zfs spl nvpair +_DP_be= zfs spl nvpair zfsbootenv _DP_netmap= _DP_ifconfig= m @@ -600,6 +602,7 @@ LIBUMEMDIR= ${OBJTOP}/cddl/lib/libumem LIBUUTILDIR= ${OBJTOP}/cddl/lib/libuutil LIBZFSDIR= ${OBJTOP}/cddl/lib/libzfs LIBZFS_COREDIR= ${OBJTOP}/cddl/lib/libzfs_core +LIBZFSBOOTENVDIR= ${OBJTOP}/cddl/lib/libzfsbootenv LIBZPOOLDIR= ${OBJTOP}/cddl/lib/libzpool LIBZUTILDIR= ${OBJTOP}/cddl/lib/libzutil LIBTPOOLDIR= ${OBJTOP}/cddl/lib/libtpool diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h index 9637faa825dd..b6845aaaae00 100644 --- a/stand/common/bootstrap.h +++ b/stand/common/bootstrap.h @@ -344,6 +344,37 @@ void delay(int delay); void dev_cleanup(void); +/* + * nvstore API. + */ +typedef int (nvstore_getter_cb_t)(void *, const char *, void **); +typedef int (nvstore_setter_cb_t)(void *, int, const char *, + const void *, size_t); +typedef int (nvstore_setter_str_cb_t)(void *, const char *, const char *, + const char *); +typedef int (nvstore_unset_cb_t)(void *, const char *); +typedef int (nvstore_print_cb_t)(void *, void *); +typedef int (nvstore_iterate_cb_t)(void *, int (*)(void *, void *)); + +typedef struct nvs_callbacks { + nvstore_getter_cb_t *nvs_getter; + nvstore_setter_cb_t *nvs_setter; + nvstore_setter_str_cb_t *nvs_setter_str; + nvstore_unset_cb_t *nvs_unset; + nvstore_print_cb_t *nvs_print; + nvstore_iterate_cb_t *nvs_iterate; +} nvs_callbacks_t; + +int nvstore_init(const char *, nvs_callbacks_t *, void *); +int nvstore_fini(const char *); +void *nvstore_get_store(const char *); +int nvstore_print(void *); +int nvstore_get_var(void *, const char *, void **); +int nvstore_set_var(void *, int, const char *, void *, size_t); +int nvstore_set_var_from_string(void *, const char *, const char *, + const char *); +int nvstore_unset_var(void *, const char *); + #ifndef CTASSERT #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif diff --git a/stand/common/nvstore.c b/stand/common/nvstore.c new file mode 100644 index 000000000000..c6d230584f9d --- /dev/null +++ b/stand/common/nvstore.c @@ -0,0 +1,310 @@ +/*- + * Copyright 2020 Toomas Soome + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Big Theory Statement. + * + * nvstore is abstraction layer to implement data read/write to different + * types of non-volatile storage. + * + * User interfaces: + * Provide mapping via environment: setenv/unsetenv/putenv. Access via + * environment functions/commands is available once nvstore has + * attached the backend and stored textual data is mapped to environment. + * + * Provide command "nvstore" to create new data instances. + * + * API: TBD. + * nvstore_init(): attach new backend and create the environment mapping. + * nvstore_fini: detach backend and unmap the related environment. + * + * The disk based storage, such as UFS file or ZFS bootenv label area, is + * only accessible after root file system is set. Root file system change + * will switch the back end storage. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include "stand.h" + +typedef struct nvstore { + char *nvs_name; + void *nvs_data; + nvs_callbacks_t *nvs_cb; + STAILQ_ENTRY(nvstore) nvs_next; +} nvstore_t; + +typedef STAILQ_HEAD(store_list, nvstore) nvstore_list_t; + +nvstore_list_t stores = STAILQ_HEAD_INITIALIZER(stores); + +void * +nvstore_get_store(const char *name) +{ + nvstore_t *st; + + st = NULL; + + STAILQ_FOREACH(st, &stores, nvs_next) { + if (strcmp(name, st->nvs_name) == 0) + break; + } + + return (st); +} + +int +nvstore_init(const char *name, nvs_callbacks_t *cb, void *data) +{ + nvstore_t *st; + + st = nvstore_get_store(name); + if (st != NULL) + return (EEXIST); + + if ((st = malloc(sizeof (*st))) == NULL) + return (ENOMEM); + + if ((st->nvs_name = strdup(name)) == NULL) { + free(st); + return (ENOMEM); + } + + st->nvs_data = data; + st->nvs_cb = cb; + + STAILQ_INSERT_TAIL(&stores, st, nvs_next); + return (0); +} + +int +nvstore_fini(const char *name) +{ + nvstore_t *st; + + st = nvstore_get_store(name); + if (st == NULL) + return (ENOENT); + + STAILQ_REMOVE(&stores, st, nvstore, nvs_next); + + free(st->nvs_name); + free(st->nvs_data); + free(st); + return (0); +} + +int +nvstore_print(void *ptr) +{ + nvstore_t *st = ptr; + + return (st->nvs_cb->nvs_iterate(st->nvs_data, st->nvs_cb->nvs_print)); +} + +int +nvstore_get_var(void *ptr, const char *name, void **data) +{ + nvstore_t *st = ptr; + + return (st->nvs_cb->nvs_getter(st->nvs_data, name, data)); +} + +int +nvstore_set_var(void *ptr, int type, const char *name, + void *data, size_t size) +{ + nvstore_t *st = ptr; + + return (st->nvs_cb->nvs_setter(st->nvs_data, type, name, data, size)); +} + +int +nvstore_set_var_from_string(void *ptr, const char *type, const char *name, + const char *data) +{ + nvstore_t *st = ptr; + + return (st->nvs_cb->nvs_setter_str(st->nvs_data, type, name, data)); +} + +int +nvstore_unset_var(void *ptr, const char *name) +{ + nvstore_t *st = ptr; + + return (st->nvs_cb->nvs_unset(st->nvs_data, name)); +} + +COMMAND_SET(nvstore, "nvstore", "manage non-volatile data", command_nvstore); + +static void +nvstore_usage(const char *me) +{ + printf("Usage:\t%s -l\n", me); + printf("\t%s store -l\n", me); + printf("\t%s store [-t type] key value\n", me); + printf("\t%s store -g key\n", me); + printf("\t%s store -d key\n", me); +} + +/* + * Usage: nvstore -l # list stores + * nvstore store -l # list data in store + * nvstore store [-t type] key value + * nvstore store -g key # get value + * nvstore store -d key # delete key + */ +static int +command_nvstore(int argc, char *argv[]) +{ + int c; + bool list, get, delete; + nvstore_t *st; + char *me, *name, *type; + + me = argv[0]; + optind = 1; + optreset = 1; + + list = false; + while ((c = getopt(argc, argv, "l")) != -1) { + switch (c) { + case 'l': + list = true; + break; + case '?': + default: + return (CMD_ERROR); + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) { + if (list) { + if (STAILQ_EMPTY(&stores)) { + printf("No configured nvstores\n"); + return (CMD_OK); + } + printf("List of configured nvstores:\n"); + STAILQ_FOREACH(st, &stores, nvs_next) { + printf("\t%s\n", st->nvs_name); + } + return (CMD_OK); + } + nvstore_usage(me); + return (CMD_ERROR); + } + + if (argc == 0 || (argc != 0 && list)) { + nvstore_usage(me); + return (CMD_ERROR); + } + + st = nvstore_get_store(argv[0]); + if (st == NULL) { + nvstore_usage(me); + return (CMD_ERROR); + } + + optind = 1; + optreset = 1; + name = NULL; + type = NULL; + get = delete = false; + + while ((c = getopt(argc, argv, "d:g:lt:")) != -1) { + switch (c) { + case 'd': + if (list || get) { + nvstore_usage(me); + return (CMD_ERROR); + } + name = optarg; + delete = true; + break; + case 'g': + if (delete || list) { + nvstore_usage(me); + return (CMD_ERROR); + } + name = optarg; + get = true; + break; + case 'l': + if (delete || get) { + nvstore_usage(me); + return (CMD_ERROR); + } + list = true; + break; + case 't': + type = optarg; + break; + case '?': + default: + return (CMD_ERROR); + } + } + + argc -= optind; + argv += optind; + + if (list) { + (void) nvstore_print(st); + return (CMD_OK); + } + + if (delete && name != NULL) { + (void) nvstore_unset_var(st, name); + return (CMD_OK); + } + + if (get && name != NULL) { + char *ptr = NULL; + + if (nvstore_get_var(st, name, (void **)&ptr) == 0) + printf("%s = %s\n", name, ptr); + return (CMD_OK); + } + + if (argc == 2) { + c = nvstore_set_var_from_string(st, type, argv[0], argv[1]); + if (c != 0) { + printf("error: %s\n", strerror(c)); + return (CMD_ERROR); + } + return (CMD_OK); + } + + nvstore_usage(me); + return (CMD_OK); +} diff --git a/stand/defaults/loader.conf b/stand/defaults/loader.conf index 4f84281e9d2e..55f9983dfc8b 100644 --- a/stand/defaults/loader.conf +++ b/stand/defaults/loader.conf @@ -16,7 +16,6 @@ bootfile="kernel" # Kernel name (possibly absolute path) kernel_options="" # Flags to be passed to the kernel loader_conf_files="/boot/device.hints /boot/loader.conf /boot/loader.conf.local" nextboot_conf="/boot/nextboot.conf" -nextboot_enable="NO" verbose_loading="NO" # Set to YES for verbose loader output ### Splash screen configuration ############################ diff --git a/stand/efi/boot1/Makefile b/stand/efi/boot1/Makefile index 0cad67d21050..aa74cfeed71b 100644 --- a/stand/efi/boot1/Makefile +++ b/stand/efi/boot1/Makefile @@ -36,7 +36,9 @@ SRCS+= zfs_module.c CFLAGS.zfs_module.c+= -I${ZFSSRC} CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/boot/zfs CFLAGS.zfs_module.c+= -I${SYSDIR}/crypto/skein -CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common +CFLAGS.zfs_module.c+= -I${SYSDIR}/contrib/openzfs/include +CFLAGS.zfs_module.c+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl +CFLAGS.zfs_module.c+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs CFLAGS.zfs_module.c+= -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 CFLAGS+= -DEFI_ZFS_BOOT diff --git a/stand/efi/boot1/zfs_module.c b/stand/efi/boot1/zfs_module.c index 5550879a3227..583d6b1d101f 100644 --- a/stand/efi/boot1/zfs_module.c +++ b/stand/efi/boot1/zfs_module.c @@ -124,7 +124,7 @@ probe(dev_info_t *dev) } memcpy(tdev, dev, sizeof(*dev)); - if (vdev_probe(vdev_read, tdev, &spa) != 0) { + if (vdev_probe(vdev_read, NULL, tdev, &spa) != 0) { free(tdev); return (EFI_UNSUPPORTED); } diff --git a/stand/efi/loader/Makefile b/stand/efi/loader/Makefile index 73b1975ad3e3..a8fbaabfdf4e 100644 --- a/stand/efi/loader/Makefile +++ b/stand/efi/loader/Makefile @@ -27,6 +27,8 @@ SRCS= autoload.c \ CFLAGS+= -I${.CURDIR}/../loader .if ${MK_LOADER_ZFS} != "no" CFLAGS+= -I${ZFSSRC} +CFLAGS+= -I${SYSDIR}/contrib/openzfs/include +CFLAGS+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs CFLAGS+= -DEFI_ZFS_BOOT HAVE_ZFS= yes .endif diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c index e81a01de8b1e..2f1add17b318 100644 --- a/stand/efi/loader/main.c +++ b/stand/efi/loader/main.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -275,11 +276,14 @@ probe_zfs_currdev(uint64_t guid) if (rv) { buf = malloc(VDEV_PAD_SIZE); if (buf != NULL) { - if (zfs_nextboot(&currdev, buf, VDEV_PAD_SIZE) == 0) { - printf("zfs nextboot: %s\n", buf); + if (zfs_get_bootonce(&currdev, OS_BOOTONCE, buf, + VDEV_PAD_SIZE) == 0) { + printf("zfs bootonce: %s\n", buf); set_currdev(buf); + setenv("zfs-bootonce", buf, 1); } free(buf); + (void) zfs_attach_nvstore(&currdev); } } return (rv); diff --git a/stand/forth/support.4th b/stand/forth/support.4th index 64c71e12ec26..e6addfbdf7bb 100644 --- a/stand/forth/support.4th +++ b/stand/forth/support.4th @@ -1057,8 +1057,16 @@ string current_file_name_ref \ used to print the file name ; : include_nextboot_file ( -- ) - get_nextboot_conf_file - ['] peek_file catch if 2drop then + s" nextboot_enable" getenv dup -1 <> if + 2dup s' "YES"' compare >r + 2dup s' "yes"' compare >r + 2dup s" YES" compare >r + 2dup s" yes" compare r> r> r> and and and 0= to nextboot? + else + drop + get_nextboot_conf_file + ['] peek_file catch if 2drop then + then nextboot? if get_nextboot_conf_file current_file_name_ref strref @@ -1066,6 +1074,7 @@ string current_file_name_ref \ used to print the file name process_conf_errors ['] rewrite_nextboot_file catch if 2drop then then + s' "NO"' s" nextboot_enable" setenv ; \ Module loading functions diff --git a/stand/i386/gptzfsboot/Makefile b/stand/i386/gptzfsboot/Makefile index 6aa7464e1503..aa8b497f8f3a 100644 --- a/stand/i386/gptzfsboot/Makefile +++ b/stand/i386/gptzfsboot/Makefile @@ -32,7 +32,9 @@ CFLAGS+=-DBOOTPROG=\"gptzfsboot\" \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ - -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/contrib/openzfs/include \ + -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl \ + -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/btx/lib \ -I${BOOTSRC}/i386/boot2 \ diff --git a/stand/i386/loader/Makefile b/stand/i386/loader/Makefile index 3c0357b7b7a7..74f39c078645 100644 --- a/stand/i386/loader/Makefile +++ b/stand/i386/loader/Makefile @@ -37,6 +37,11 @@ CFLAGS+= -DLOADER_FIREWIRE_SUPPORT LIBFIREWIRE= ${BOOTOBJ}/i386/libfirewire/libfirewire.a .endif +.if ${MK_LOADER_ZFS} == "yes" +CFLAGS.main.c+= -I${SYSDIR}/contrib/openzfs/include +CFLAGS.main.c+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs +.endif + .if exists(${.CURDIR}/help.i386) HELP_FILES= ${.CURDIR}/help.i386 .endif diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c index d7c23ba807ad..d6831e68e3b4 100644 --- a/stand/i386/loader/main.c +++ b/stand/i386/loader/main.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include "bootstrap.h" @@ -274,6 +275,7 @@ extract_currdev(void) struct i386_devdesc new_currdev; #ifdef LOADER_ZFS_SUPPORT char buf[20]; + char *bootonce; #endif int biosdev = -1; @@ -321,6 +323,16 @@ extract_currdev(void) new_currdev.d_kind.zfs.root_guid = 0; } new_currdev.dd.d_dev = &zfs_dev; + + if ((bootonce = malloc(VDEV_PAD_SIZE)) != NULL) { + if (zfs_get_bootonce(&new_currdev, OS_BOOTONCE_USED, + bootonce, VDEV_PAD_SIZE) == 0) { + setenv("zfs-bootonce", bootonce, 1); + } + free(bootonce); + (void) zfs_attach_nvstore(&new_currdev); + } + #endif } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) { /* The passed-in boot device is bad */ diff --git a/stand/i386/zfsboot/Makefile b/stand/i386/zfsboot/Makefile index 8c0527848478..ff315abc0efb 100644 --- a/stand/i386/zfsboot/Makefile +++ b/stand/i386/zfsboot/Makefile @@ -31,7 +31,9 @@ CFLAGS+=-DBOOTPROG=\"zfsboot\" \ -I${ZFSSRC} \ -I${SYSDIR}/crypto/skein \ -I${SYSDIR}/cddl/boot/zfs \ - -I${SYSDIR}/cddl/contrib/opensolaris/uts/common \ + -I${SYSDIR}/contrib/openzfs/include \ + -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl \ + -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs \ -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ -I${BOOTSRC}/i386/boot2 \ -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ diff --git a/stand/i386/zfsboot/zfsboot.c b/stand/i386/zfsboot/zfsboot.c index 81a6d74abc7b..9e66e89f4d43 100644 --- a/stand/i386/zfsboot/zfsboot.c +++ b/stand/i386/zfsboot/zfsboot.c @@ -26,6 +26,7 @@ __FBSDID("$FreeBSD$"); #endif #include #include +#include #include #include @@ -218,16 +219,26 @@ main(void) if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) { /* set up proper device name string for ZFS */ strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname)); - if (zfs_nextboot(bdev, cmd, sizeof(cmd)) == 0) { + if (zfs_get_bootonce(bdev, OS_BOOTONCE, cmd, + sizeof(cmd)) == 0) { + nvlist_t *benv; + nextboot = 1; memcpy(cmddup, cmd, sizeof(cmd)); if (parse_cmd()) { if (!OPT_CHECK(RBX_QUIET)) - printf("failed to parse pad2 area\n"); + printf("failed to parse bootonce " + "command\n"); exit(0); } if (!OPT_CHECK(RBX_QUIET)) - printf("zfs nextboot: %s\n", cmddup); + printf("zfs bootonce: %s\n", cmddup); + + if (zfs_get_bootenv(bdev, &benv) == 0) { + nvlist_add_string(benv, OS_BOOTONCE_USED, + cmddup); + zfs_set_bootenv(bdev, benv); + } /* Do not process this command twice */ *cmd = 0; } diff --git a/stand/libsa/zfs/Makefile.inc b/stand/libsa/zfs/Makefile.inc index 4212e2b40b93..0b77f9a4cdb5 100644 --- a/stand/libsa/zfs/Makefile.inc +++ b/stand/libsa/zfs/Makefile.inc @@ -12,6 +12,8 @@ CFLAGS+= -I${SYSDIR}/crypto/skein # Do not unroll skein loops, reduce code size CFLAGS.skein_block.c+= -DSKEIN_LOOP=111 -CFLAGS.zfs.c+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 +CFLAGS+= -I${SYSDIR}/contrib/openzfs/include +CFLAGS+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs +CFLAGS.zfs.c+= -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 CFLAGS+= -Wformat -Wall diff --git a/stand/libsa/zfs/libzfs.h b/stand/libsa/zfs/libzfs.h index e9eb717f0cb0..27fde493670c 100644 --- a/stand/libsa/zfs/libzfs.h +++ b/stand/libsa/zfs/libzfs.h @@ -26,15 +26,15 @@ * $FreeBSD$ */ +#ifndef _BOOT_LIBZFS_H_ +#define _BOOT_LIBZFS_H_ + #include #ifdef LOADER_GELI_SUPPORT #include #endif -#ifndef _BOOT_LIBZFS_H_ -#define _BOOT_LIBZFS_H_ - #define ZFS_MAXNAMELEN 256 /* @@ -54,6 +54,7 @@ struct zfs_devdesc { #define NV_UNIQUE_NAME_TYPE 0x2 #define NV_ALIGN4(x) (((x) + 3) & ~3) +#define NV_ALIGN(x) (((x) + 7) & ~7) /* * nvlist header. @@ -109,25 +110,62 @@ typedef struct { nvlist_t *nvlist_create(int); void nvlist_destroy(nvlist_t *); -nvlist_t *nvlist_import(const uint8_t *, char, char); +nvlist_t *nvlist_import(const char *, size_t); +int nvlist_export(nvlist_t *); int nvlist_remove(nvlist_t *, const char *, data_type_t); -void nvlist_print(nvlist_t *, unsigned int); +int nvpair_type_from_name(const char *); +nvp_header_t *nvpair_find(nvlist_t *, const char *); +void nvpair_print(nvp_header_t *, unsigned int); +void nvlist_print(const nvlist_t *, unsigned int); +char *nvstring_get(nv_string_t *); int nvlist_find(const nvlist_t *, const char *, data_type_t, int *, void *, int *); -int nvlist_next(nvlist_t *); +nvp_header_t *nvlist_next_nvpair(nvlist_t *, nvp_header_t *); + +int nvlist_add_boolean_value(nvlist_t *, const char *, boolean_t); +int nvlist_add_byte(nvlist_t *, const char *, uint8_t); +int nvlist_add_int8(nvlist_t *, const char *, int8_t); +int nvlist_add_uint8(nvlist_t *, const char *, uint8_t); +int nvlist_add_int16(nvlist_t *, const char *, int16_t); +int nvlist_add_uint16(nvlist_t *, const char *, uint16_t); +int nvlist_add_int32(nvlist_t *, const char *, int32_t); +int nvlist_add_uint32(nvlist_t *, const char *, uint32_t); +int nvlist_add_int64(nvlist_t *, const char *, int64_t); +int nvlist_add_uint64(nvlist_t *, const char *, uint64_t); +int nvlist_add_string(nvlist_t *, const char *, const char *); +int nvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint32_t); +int nvlist_add_byte_array(nvlist_t *, const char *, uint8_t *, uint32_t); +int nvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint32_t); +int nvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint32_t); +int nvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint32_t); +int nvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint32_t); +int nvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint32_t); +int nvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint32_t); +int nvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint32_t); +int nvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint32_t); +int nvlist_add_string_array(nvlist_t *, const char *, char * const *, uint32_t); +int nvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *); +int nvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint32_t); int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path); char *zfs_fmtdev(void *vdev); -int zfs_nextboot(void *vdev, char *buf, size_t size); int zfs_probe_dev(const char *devname, uint64_t *pool_guid); int zfs_list(const char *name); +int zfs_get_bootonce(void *, const char *, char *, size_t); +int zfs_get_bootenv(void *, nvlist_t **); +int zfs_set_bootenv(void *, nvlist_t *); +int zfs_attach_nvstore(void *); uint64_t ldi_get_size(void *); void init_zfs_boot_options(const char *currdev); + int zfs_bootenv(const char *name); +int zfs_attach_nvstore(void *); int zfs_belist_add(const char *name, uint64_t __unused); int zfs_set_env(void); +nvlist_t *vdev_read_bootenv(vdev_t *); + extern struct devsw zfs_dev; extern struct fs_ops zfs_fsops; diff --git a/stand/libsa/zfs/nvlist.c b/stand/libsa/zfs/nvlist.c index 910d25499401..6cd2b40e5ada 100644 --- a/stand/libsa/zfs/nvlist.c +++ b/stand/libsa/zfs/nvlist.c @@ -27,129 +27,279 @@ __FBSDID("$FreeBSD$"); #include +#include #include +#include +#include #include #include "libzfs.h" +enum xdr_op { + XDR_OP_ENCODE = 1, + XDR_OP_DECODE = 2 +}; + typedef struct xdr { - int (*xdr_getint)(const struct xdr *, const void *, int *); + enum xdr_op xdr_op; + int (*xdr_getint)(struct xdr *, int *); + int (*xdr_putint)(struct xdr *, int); + int (*xdr_getuint)(struct xdr *, unsigned *); + int (*xdr_putuint)(struct xdr *, unsigned); + const uint8_t *xdr_buf; + uint8_t *xdr_idx; + size_t xdr_buf_size; } xdr_t; -static int xdr_int(const xdr_t *, const void *, int *); -static int mem_int(const xdr_t *, const void *, int *); -static void nvlist_decode_nvlist(const xdr_t *, nvlist_t *); -static int nvlist_size(const xdr_t *, const uint8_t *); +static int nvlist_xdr_nvlist(xdr_t *, nvlist_t *); +static bool nvlist_size_xdr(xdr_t *, size_t *); +static bool nvlist_size_native(xdr_t *, size_t *); +static bool xdr_int(xdr_t *, int *); +static bool xdr_u_int(xdr_t *, unsigned *); -/* - * transform data from network to host. - */ -xdr_t ntoh = { - .xdr_getint = xdr_int -}; - -/* - * transform data from host to host. - */ -xdr_t native = { - .xdr_getint = mem_int -}; - -/* - * transform data from host to network. - */ -xdr_t hton = { - .xdr_getint = xdr_int -}; +typedef int (*xdrproc_t)(xdr_t *, void *); +/* Basic primitives for XDR translation operations, getint and putint. */ static int -xdr_short(const xdr_t *xdr, const uint8_t *buf, short *ip) +_getint(struct xdr *xdr, int *ip) { - int i, rv; - - rv = xdr->xdr_getint(xdr, buf, &i); - *ip = i; - return (rv); + *ip = be32dec(xdr->xdr_idx); + return (sizeof (int)); } static int -xdr_u_short(const xdr_t *xdr, const uint8_t *buf, unsigned short *ip) +_putint(struct xdr *xdr, int i) +{ + int *ip = (int *)xdr->xdr_idx; + + *ip = htobe32(i); + return (sizeof (int)); +} + +static int +_getuint(struct xdr *xdr, unsigned *ip) +{ + *ip = be32dec(xdr->xdr_idx); + return (sizeof (unsigned)); +} + +static int +_putuint(struct xdr *xdr, unsigned i) +{ + unsigned *up = (unsigned *)xdr->xdr_idx; + + *up = htobe32(i); + return (sizeof (int)); +} + +/* + * XDR data translations. + */ +static bool +xdr_short(xdr_t *xdr, short *ip) +{ + int i; + bool rv; + + i = *ip; + if ((rv = xdr_int(xdr, &i))) { + if (xdr->xdr_op == XDR_OP_DECODE) + *ip = i; + } + return (rv); +} + +static bool +xdr_u_short(xdr_t *xdr, unsigned short *ip) { unsigned u; - int rv; + bool rv; - rv = xdr->xdr_getint(xdr, buf, &u); - *ip = u; - return (rv); -} - -static int -xdr_int(const xdr_t *xdr __unused, const void *buf, int *ip) -{ - *ip = be32dec(buf); - return (sizeof(int)); -} - -static int -xdr_u_int(const xdr_t *xdr __unused, const void *buf, unsigned *ip) -{ - *ip = be32dec(buf); - return (sizeof(unsigned)); -} - -static int -xdr_string(const xdr_t *xdr, const void *buf, nv_string_t *s) -{ - int size; - - size = xdr->xdr_getint(xdr, buf, &s->nv_size); - size = NV_ALIGN4(size + s->nv_size); - return (size); -} - -static int -xdr_int64(const xdr_t *xdr, const uint8_t *buf, int64_t *lp) -{ - int hi, rv; - unsigned lo; - - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; - return (rv); -} - -static int -xdr_uint64(const xdr_t *xdr, const uint8_t *buf, uint64_t *lp) -{ - unsigned hi, lo; - int rv; - - rv = xdr->xdr_getint(xdr, buf, &hi); - rv += xdr->xdr_getint(xdr, buf + rv, &lo); - *lp = (((int64_t)hi) << 32) | lo; - return (rv); -} - -static int -xdr_char(const xdr_t *xdr, const uint8_t *buf, char *cp) -{ - int i, rv; - - rv = xdr->xdr_getint(xdr, buf, &i); - *cp = i; + u = *ip; + if ((rv = xdr_u_int(xdr, &u))) { + if (xdr->xdr_op == XDR_OP_DECODE) + *ip = u; + } return (rv); } /* - * read native data. + * translate xdr->xdr_idx, increment it by size of int. */ -static int -mem_int(const xdr_t *xdr, const void *buf, int *i) +static bool +xdr_int(xdr_t *xdr, int *ip) { - *i = *(int *)buf; - return (sizeof(int)); + bool rv = false; + int *i = (int *)xdr->xdr_idx; + + if (xdr->xdr_idx + sizeof (int) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + xdr->xdr_idx += xdr->xdr_putint(xdr, *ip); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getint(xdr, i); + *ip = *i; + rv = true; + break; + } + return (rv); } +/* + * translate xdr->xdr_idx, increment it by size of unsigned int. + */ +static bool +xdr_u_int(xdr_t *xdr, unsigned *ip) +{ + bool rv = false; + unsigned *u = (unsigned *)xdr->xdr_idx; + + if (xdr->xdr_idx + sizeof (unsigned) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + xdr->xdr_idx += xdr->xdr_putuint(xdr, *ip); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getuint(xdr, u); + *ip = *u; + rv = true; + break; + } + return (rv); +} + +static bool +xdr_int64(xdr_t *xdr, int64_t *lp) +{ + int hi; + unsigned lo; + bool rv = false; + + if (xdr->xdr_idx + sizeof (int64_t) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *lp, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + xdr->xdr_idx += xdr->xdr_putint(xdr, hi); + xdr->xdr_idx += xdr->xdr_putint(xdr, lo); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getint(xdr, &hi); + xdr->xdr_idx += xdr->xdr_getuint(xdr, &lo); + *lp = (((int64_t)hi) << 32) | lo; + rv = true; + } + return (rv); +} + +static bool +xdr_uint64(xdr_t *xdr, uint64_t *lp) +{ + unsigned hi, lo; + bool rv = false; + + if (xdr->xdr_idx + sizeof (uint64_t) > xdr->xdr_buf + xdr->xdr_buf_size) + return (rv); + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + /* Encode value *ip, store to buf */ + hi = *lp >> 32; + lo = *lp & UINT32_MAX; + xdr->xdr_idx += xdr->xdr_putint(xdr, hi); + xdr->xdr_idx += xdr->xdr_putint(xdr, lo); + rv = true; + break; + + case XDR_OP_DECODE: + /* Decode buf, return value to *ip */ + xdr->xdr_idx += xdr->xdr_getuint(xdr, &hi); + xdr->xdr_idx += xdr->xdr_getuint(xdr, &lo); + *lp = (((uint64_t)hi) << 32) | lo; + rv = true; + } + return (rv); +} + +static bool +xdr_char(xdr_t *xdr, char *cp) +{ + int i; + bool rv = false; + + i = *cp; + if ((rv = xdr_int(xdr, &i))) { + if (xdr->xdr_op == XDR_OP_DECODE) + *cp = i; + } + return (rv); +} + +static bool +xdr_string(xdr_t *xdr, nv_string_t *s) +{ + int size = 0; + bool rv = false; + + switch (xdr->xdr_op) { + case XDR_OP_ENCODE: + size = s->nv_size; + if (xdr->xdr_idx + sizeof (unsigned) + NV_ALIGN4(size) > + xdr->xdr_buf + xdr->xdr_buf_size) + break; + xdr->xdr_idx += xdr->xdr_putuint(xdr, s->nv_size); + xdr->xdr_idx += NV_ALIGN4(size); + rv = true; + break; + + case XDR_OP_DECODE: + if (xdr->xdr_idx + sizeof (unsigned) > + xdr->xdr_buf + xdr->xdr_buf_size) + break; + size = xdr->xdr_getuint(xdr, &s->nv_size); + size = NV_ALIGN4(size + s->nv_size); + if (xdr->xdr_idx + size > xdr->xdr_buf + xdr->xdr_buf_size) + break; + xdr->xdr_idx += size; + rv = true; + break; + } + return (rv); +} + +static bool +xdr_array(xdr_t *xdr, const unsigned nelem, const xdrproc_t elproc) +{ + bool rv = true; + + for (unsigned i = 0; i < nelem; i++) { + if (!elproc(xdr, xdr->xdr_idx)) + return (false); + } + return (rv); +} + +/* + * nvlist management functions. + */ void nvlist_destroy(nvlist_t *nvl) { @@ -184,14 +334,14 @@ nvlist_create(int flag) nvlist_t *nvl; nvs_data_t *nvs; - nvl = calloc(1, sizeof(*nvl)); + nvl = calloc(1, sizeof (*nvl)); if (nvl == NULL) return (nvl); nvl->nv_header.nvh_encoding = NV_ENCODE_XDR; nvl->nv_header.nvh_endian = _BYTE_ORDER == _LITTLE_ENDIAN; - nvl->nv_asize = nvl->nv_size = sizeof(*nvs); + nvl->nv_asize = nvl->nv_size = sizeof (*nvs); nvs = calloc(1, nvl->nv_asize); if (nvs == NULL) { free(nvl); @@ -205,32 +355,59 @@ nvlist_create(int flag) return (nvl); } -static void -nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph) +static bool +nvlist_xdr_nvp(xdr_t *xdr, nvlist_t *nvl) { nv_string_t *nv_string; nv_pair_data_t *nvp_data; nvlist_t nvlist; + unsigned type, nelem; + xdr_t nv_xdr; - nv_string = (nv_string_t *)nvl->nv_idx; - nvl->nv_idx += xdr_string(xdr, &nv_string->nv_size, nv_string); - nvp_data = (nv_pair_data_t *)nvl->nv_idx; + nv_string = (nv_string_t *)xdr->xdr_idx; + if (!xdr_string(xdr, nv_string)) { + return (false); + } + nvp_data = (nv_pair_data_t *)xdr->xdr_idx; - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_type, &nvp_data->nv_type); - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_nelem, &nvp_data->nv_nelem); + type = nvp_data->nv_type; + nelem = nvp_data->nv_nelem; + if (!xdr_u_int(xdr, &type) || !xdr_u_int(xdr, &nelem)) + return (false); - switch (nvp_data->nv_type) { + switch (type) { case DATA_TYPE_NVLIST: case DATA_TYPE_NVLIST_ARRAY: bzero(&nvlist, sizeof (nvlist)); - nvlist.nv_data = &nvp_data->nv_data[0]; + nvlist.nv_data = xdr->xdr_idx; nvlist.nv_idx = nvlist.nv_data; - for (int i = 0; i < nvp_data->nv_nelem; i++) { - nvlist.nv_asize = - nvlist_size(xdr, nvlist.nv_data); - nvlist_decode_nvlist(xdr, &nvlist); - nvl->nv_idx = nvlist.nv_idx; - nvlist.nv_data = nvlist.nv_idx; + + /* Set up xdr for this nvlist. */ + nv_xdr = *xdr; + nv_xdr.xdr_buf = nvlist.nv_data; + nv_xdr.xdr_idx = nvlist.nv_data; + nv_xdr.xdr_buf_size = + nvl->nv_data + nvl->nv_size - nvlist.nv_data; + + for (unsigned i = 0; i < nelem; i++) { + if (xdr->xdr_op == XDR_OP_ENCODE) { + if (!nvlist_size_native(&nv_xdr, + &nvlist.nv_size)) + return (false); + } else { + if (!nvlist_size_xdr(&nv_xdr, + &nvlist.nv_size)) + return (false); + } + if (nvlist_xdr_nvlist(xdr, &nvlist) != 0) + return (false); + + nvlist.nv_data = nv_xdr.xdr_idx; + nvlist.nv_idx = nv_xdr.xdr_idx; + + nv_xdr.xdr_buf = nv_xdr.xdr_idx; + nv_xdr.xdr_buf_size = + nvl->nv_data + nvl->nv_size - nvlist.nv_data; } break; @@ -240,98 +417,275 @@ nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph) case DATA_TYPE_BYTE: case DATA_TYPE_INT8: case DATA_TYPE_UINT8: - nvl->nv_idx += xdr_char(xdr, &nvp_data->nv_data[0], - (char *)&nvp_data->nv_data[0]); + if (!xdr_char(xdr, (char *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_INT16: - nvl->nv_idx += xdr_short(xdr, &nvp_data->nv_data[0], - (short *)&nvp_data->nv_data[0]); + if (!xdr_short(xdr, (short *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_UINT16: - nvl->nv_idx += xdr_u_short(xdr, &nvp_data->nv_data[0], - (unsigned short *)&nvp_data->nv_data[0]); + if (!xdr_u_short(xdr, (unsigned short *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_BOOLEAN_VALUE: case DATA_TYPE_INT32: - nvl->nv_idx += xdr_int(xdr, &nvp_data->nv_data[0], - (int *)&nvp_data->nv_data[0]); + if (!xdr_int(xdr, (int *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_UINT32: - nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_data[0], - (unsigned *)&nvp_data->nv_data[0]); + if (!xdr_u_int(xdr, (unsigned *)&nvp_data->nv_data[0])) + return (false); break; + case DATA_TYPE_HRTIME: case DATA_TYPE_INT64: - nvl->nv_idx += xdr_int64(xdr, &nvp_data->nv_data[0], - (int64_t *)&nvp_data->nv_data[0]); + if (!xdr_int64(xdr, (int64_t *)&nvp_data->nv_data[0])) + return (false); break; case DATA_TYPE_UINT64: - nvl->nv_idx += xdr_uint64(xdr, &nvp_data->nv_data[0], - (uint64_t *)&nvp_data->nv_data[0]); + if (!xdr_uint64(xdr, (uint64_t *)&nvp_data->nv_data[0])) + return (false); break; + case DATA_TYPE_BYTE_ARRAY: case DATA_TYPE_STRING: nv_string = (nv_string_t *)&nvp_data->nv_data[0]; - nvl->nv_idx += xdr_string(xdr, &nvp_data->nv_data[0], - nv_string); + if (!xdr_string(xdr, nv_string)) + return (false); + break; + case DATA_TYPE_STRING_ARRAY: + nv_string = (nv_string_t *)&nvp_data->nv_data[0]; + for (unsigned i = 0; i < nelem; i++) { + if (!xdr_string(xdr, nv_string)) + return (false); + nv_string = (nv_string_t *)xdr->xdr_idx; + } + break; + + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + if (!xdr_array(xdr, nelem, (xdrproc_t)xdr_u_int)) + return (false); + break; + + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + if (!xdr_array(xdr, nelem, (xdrproc_t)xdr_uint64)) + return (false); break; } -} - -static void -nvlist_decode_nvlist(const xdr_t *xdr, nvlist_t *nvl) -{ - nvp_header_t *nvph; - nvs_data_t *nvs = (nvs_data_t *)nvl->nv_data; - - nvl->nv_idx = nvl->nv_data; - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_version, - &nvs->nvl_version); - nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_nvflag, - &nvs->nvl_nvflag); - - nvph = &nvs->nvl_pair; - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->encoded_size, &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, - (const uint8_t *)&nvph->decoded_size, &nvph->decoded_size); - - while (nvph->encoded_size && nvph->decoded_size) { - nvlist_nvp_decode(xdr, nvl, nvph); - - nvph = (nvp_header_t *)(nvl->nv_idx); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->encoded_size, - &nvph->encoded_size); - nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->decoded_size, - &nvph->decoded_size); - } + return (true); } static int -nvlist_size(const xdr_t *xdr, const uint8_t *stream) +nvlist_xdr_nvlist(xdr_t *xdr, nvlist_t *nvl) { - const uint8_t *p, *pair; + nvp_header_t *nvph; + nvs_data_t *nvs; + unsigned encoded_size, decoded_size; + int rv; + + nvs = (nvs_data_t *)xdr->xdr_idx; + nvph = &nvs->nvl_pair; + + if (!xdr_u_int(xdr, &nvs->nvl_version)) + return (EINVAL); + if (!xdr_u_int(xdr, &nvs->nvl_nvflag)) + return (EINVAL); + + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + + if (xdr->xdr_op == XDR_OP_ENCODE) { + if (!xdr_u_int(xdr, &nvph->encoded_size)) + return (EINVAL); + if (!xdr_u_int(xdr, &nvph->decoded_size)) + return (EINVAL); + } else { + xdr->xdr_idx += 2 * sizeof (unsigned); + } + + rv = 0; + while (encoded_size && decoded_size) { + if (!nvlist_xdr_nvp(xdr, nvl)) + return (EINVAL); + + nvph = (nvp_header_t *)(xdr->xdr_idx); + encoded_size = nvph->encoded_size; + decoded_size = nvph->decoded_size; + if (xdr->xdr_op == XDR_OP_ENCODE) { + if (!xdr_u_int(xdr, &nvph->encoded_size)) + return (EINVAL); + if (!xdr_u_int(xdr, &nvph->decoded_size)) + return (EINVAL); + } else { + xdr->xdr_idx += 2 * sizeof (unsigned); + } + } + return (rv); +} + +/* + * Calculate nvlist size, translating encoded_size and decoded_size. + */ +static bool +nvlist_size_xdr(xdr_t *xdr, size_t *size) +{ + uint8_t *pair; unsigned encoded_size, decoded_size; - p = stream; - p += 2 * sizeof(unsigned); + xdr->xdr_idx += 2 * sizeof (unsigned); + + pair = xdr->xdr_idx; + if (!xdr_u_int(xdr, &encoded_size) || !xdr_u_int(xdr, &decoded_size)) + return (false); - pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); while (encoded_size && decoded_size) { - p = pair + encoded_size; - pair = p; - p += xdr->xdr_getint(xdr, p, &encoded_size); - p += xdr->xdr_getint(xdr, p, &decoded_size); + xdr->xdr_idx = pair + encoded_size; + pair = xdr->xdr_idx; + if (!xdr_u_int(xdr, &encoded_size) || + !xdr_u_int(xdr, &decoded_size)) + return (false); } - return (p - stream); + *size = xdr->xdr_idx - xdr->xdr_buf; + + return (true); +} + +nvp_header_t * +nvlist_next_nvpair(nvlist_t *nvl, nvp_header_t *nvh) +{ + uint8_t *pair; + unsigned encoded_size, decoded_size; + xdr_t xdr; + + if (nvl == NULL) + return (NULL); + + xdr.xdr_buf = nvl->nv_data; + xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf_size = nvl->nv_size; + + xdr.xdr_idx += 2 * sizeof (unsigned); + + /* Skip tp current pair */ + if (nvh != NULL) { + xdr.xdr_idx = (uint8_t *)nvh; + } + + pair = xdr.xdr_idx; + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + encoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + decoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + while (encoded_size && decoded_size) { + if (nvh == NULL) + return ((nvp_header_t *)pair); + + xdr.xdr_idx = pair + encoded_size; + nvh = (nvp_header_t *)xdr.xdr_idx; + + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + encoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + decoded_size = *(unsigned *)xdr.xdr_idx; + xdr.xdr_idx += sizeof (unsigned); + if (xdr.xdr_idx > xdr.xdr_buf + xdr.xdr_buf_size) + return (NULL); + + if (encoded_size != 0 && decoded_size != 0) { + return (nvh); + } + } + return (NULL); +} + +/* + * Calculate nvlist size by walking in memory data. + */ +static bool +nvlist_size_native(xdr_t *xdr, size_t *size) +{ + uint8_t *pair; + unsigned encoded_size, decoded_size; + + xdr->xdr_idx += 2 * sizeof (unsigned); + + pair = xdr->xdr_idx; + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + + encoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + decoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + while (encoded_size && decoded_size) { + xdr->xdr_idx = pair + encoded_size; + pair = xdr->xdr_idx; + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + encoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + if (xdr->xdr_idx > xdr->xdr_buf + xdr->xdr_buf_size) + return (false); + decoded_size = *(unsigned *)xdr->xdr_idx; + xdr->xdr_idx += sizeof (unsigned); + } + *size = xdr->xdr_idx - xdr->xdr_buf; + + return (true); +} + +/* + * Export nvlist to byte stream format. + */ +int +nvlist_export(nvlist_t *nvl) +{ + int rv; + xdr_t xdr = { + .xdr_op = XDR_OP_ENCODE, + .xdr_putint = _putint, + .xdr_putuint = _putuint, + .xdr_buf = nvl->nv_data, + .xdr_idx = nvl->nv_data, + .xdr_buf_size = nvl->nv_size + }; + + if (nvl->nv_header.nvh_encoding != NV_ENCODE_XDR) + return (ENOTSUP); + + nvl->nv_idx = nvl->nv_data; + rv = nvlist_xdr_nvlist(&xdr, nvl); + + return (rv); } /* @@ -340,28 +694,57 @@ nvlist_size(const xdr_t *xdr, const uint8_t *stream) * Then translate the data. */ nvlist_t * -nvlist_import(const uint8_t *stream, char encoding, char endian) +nvlist_import(const char *stream, size_t size) { nvlist_t *nvl; + xdr_t xdr = { + .xdr_op = XDR_OP_DECODE, + .xdr_getint = _getint, + .xdr_getuint = _getuint + }; - if (encoding != NV_ENCODE_XDR) + /* Check the nvlist head. */ + if (stream[0] != NV_ENCODE_XDR || + (stream[1] != '\0' && stream[1] != '\1') || + stream[2] != '\0' || stream[3] != '\0' || + be32toh(*(uint32_t *)(stream + 4)) != NV_VERSION || + be32toh(*(uint32_t *)(stream + 8)) != NV_UNIQUE_NAME) return (NULL); - nvl = malloc(sizeof(*nvl)); + nvl = malloc(sizeof (*nvl)); if (nvl == NULL) return (nvl); - nvl->nv_asize = nvl->nv_size = nvlist_size(&ntoh, stream); + nvl->nv_header.nvh_encoding = stream[0]; + nvl->nv_header.nvh_endian = stream[1]; + nvl->nv_header.nvh_reserved1 = stream[2]; + nvl->nv_header.nvh_reserved2 = stream[3]; + + xdr.xdr_buf = xdr.xdr_idx = (uint8_t *)stream + 4; + xdr.xdr_buf_size = size - 4; + + if (!nvlist_size_xdr(&xdr, &nvl->nv_asize)) { + free(nvl); + return (NULL); + } + nvl->nv_size = nvl->nv_asize; nvl->nv_data = malloc(nvl->nv_asize); if (nvl->nv_data == NULL) { free(nvl); return (NULL); } nvl->nv_idx = nvl->nv_data; - bcopy(stream, nvl->nv_data, nvl->nv_asize); + bcopy(stream + 4, nvl->nv_data, nvl->nv_asize); + + xdr.xdr_buf = xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf_size = nvl->nv_asize; + + if (nvlist_xdr_nvlist(&xdr, nvl) != 0) { + free(nvl->nv_data); + free(nvl); + nvl = NULL; + } - nvlist_decode_nvlist(&ntoh, nvl); - nvl->nv_idx = nvl->nv_data; return (nvl); } @@ -377,30 +760,37 @@ nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) nv_string_t *nvp_name; nv_pair_data_t *nvp_data; size_t size; + xdr_t xdr; if (nvl == NULL || nvl->nv_data == NULL || name == NULL) return (EINVAL); - head = nvl->nv_data; - data = (nvs_data_t *)head; + /* Make sure the nvlist size is set correct */ + xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = nvl->nv_size; + if (!nvlist_size_native(&xdr, &nvl->nv_size)) + return (EINVAL); + + data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ head = (uint8_t *)nvp; while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp_name = (nv_string_t *)(head + sizeof(*nvp)); + nvp_name = (nv_string_t *)(nvp + 1); - nvp_data = (nv_pair_data_t *) - NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + - nvp_name->nv_size); + nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + + NV_ALIGN4(nvp_name->nv_size)); - if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && - nvp_data->nv_type == type) { + if (strlen(name) == nvp_name->nv_size && + memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && + (nvp_data->nv_type == type || type == DATA_TYPE_UNKNOWN)) { /* * set tail to point to next nvpair and size * is the length of the tail. */ tail = head + nvp->encoded_size; - size = nvl->nv_data + nvl->nv_size - tail; + size = nvl->nv_size - (tail - nvl->nv_data); /* adjust the size of the nvlist. */ nvl->nv_size -= nvp->encoded_size; @@ -414,6 +804,51 @@ nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type) return (ENOENT); } +static int +clone_nvlist(const nvlist_t *nvl, const uint8_t *ptr, unsigned size, + nvlist_t **nvlist) +{ + nvlist_t *nv; + + nv = calloc(1, sizeof (*nv)); + if (nv == NULL) + return (ENOMEM); + + nv->nv_header = nvl->nv_header; + nv->nv_asize = size; + nv->nv_size = size; + nv->nv_data = malloc(nv->nv_asize); + if (nv->nv_data == NULL) { + free(nv); + return (ENOMEM); + } + + bcopy(ptr, nv->nv_data, nv->nv_asize); + *nvlist = nv; + return (0); +} + +/* + * Return the next nvlist in an nvlist array. + */ +static uint8_t * +nvlist_next(const uint8_t *ptr) +{ + nvs_data_t *data; + nvp_header_t *nvp; + + data = (nvs_data_t *)ptr; + nvp = &data->nvl_pair; /* first pair in nvlist */ + + while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { + nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + } + return ((uint8_t *)nvp + sizeof (*nvp)); +} + +/* + * Note: nvlist and nvlist array must be freed by caller. + */ int nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, int *elementsp, void *valuep, int *sizep) @@ -422,7 +857,9 @@ nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, nvp_header_t *nvp; nv_string_t *nvp_name; nv_pair_data_t *nvp_data; - nvlist_t *nvlist; + nvlist_t **nvlist, *nv; + uint8_t *ptr; + int rv; if (nvl == NULL || nvl->nv_data == NULL || name == NULL) return (EINVAL); @@ -431,20 +868,24 @@ nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp)); + nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof (*nvp)); + if (nvl->nv_data + nvl->nv_size < + nvp_name->nv_data + nvp_name->nv_size) + return (EIO); nvp_data = (nv_pair_data_t *) NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); - if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && - nvp_data->nv_type == type) { + if (strlen(name) == nvp_name->nv_size && + memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 && + (nvp_data->nv_type == type || type == DATA_TYPE_UNKNOWN)) { if (elementsp != NULL) *elementsp = nvp_data->nv_nelem; switch (nvp_data->nv_type) { case DATA_TYPE_UINT64: - *(uint64_t *)valuep = - *(uint64_t *)nvp_data->nv_data; + bcopy(nvp_data->nv_data, valuep, + sizeof (uint64_t)); return (0); case DATA_TYPE_STRING: nvp_name = (nv_string_t *)nvp_data->nv_data; @@ -455,146 +896,673 @@ nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type, &nvp_name->nv_data[0]; return (0); case DATA_TYPE_NVLIST: - case DATA_TYPE_NVLIST_ARRAY: - nvlist = malloc(sizeof(*nvlist)); - if (nvlist != NULL) { - nvlist->nv_header = nvl->nv_header; - nvlist->nv_asize = 0; - nvlist->nv_size = 0; - nvlist->nv_idx = NULL; - nvlist->nv_data = &nvp_data->nv_data[0]; - *(nvlist_t **)valuep = nvlist; - return (0); + ptr = &nvp_data->nv_data[0]; + rv = clone_nvlist(nvl, ptr, + nvlist_next(ptr) - ptr, &nv); + if (rv == 0) { + *(nvlist_t **)valuep = nv; } - return (ENOMEM); + return (rv); + + case DATA_TYPE_NVLIST_ARRAY: + nvlist = calloc(nvp_data->nv_nelem, + sizeof (nvlist_t *)); + if (nvlist == NULL) + return (ENOMEM); + ptr = &nvp_data->nv_data[0]; + rv = 0; + for (unsigned i = 0; i < nvp_data->nv_nelem; + i++) { + rv = clone_nvlist(nvl, ptr, + nvlist_next(ptr) - ptr, &nvlist[i]); + if (rv != 0) + goto error; + ptr = nvlist_next(ptr); + } + *(nvlist_t ***)valuep = nvlist; + return (rv); } return (EIO); } /* Not our pair, skip to next. */ nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + if (nvl->nv_data + nvl->nv_size < (uint8_t *)nvp) + return (EIO); } return (ENOENT); +error: + for (unsigned i = 0; i < nvp_data->nv_nelem; i++) { + free(nvlist[i]->nv_data); + free(nvlist[i]); + } + free(nvlist); + return (rv); } -/* - * Return the next nvlist in an nvlist array. - */ -int -nvlist_next(nvlist_t *nvl) +static int +get_value_size(data_type_t type, const void *data, uint32_t nelem) { - nvs_data_t *data; - nvp_header_t *nvp; + uint64_t value_sz = 0; - if (nvl == NULL || nvl->nv_data == NULL || nvl->nv_asize != 0) + switch (type) { + case DATA_TYPE_BOOLEAN: + value_sz = 0; + break; + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + /* Our smallest data unit is 32-bit */ + value_sz = sizeof (uint32_t); + break; + case DATA_TYPE_HRTIME: + case DATA_TYPE_INT64: + value_sz = sizeof (int64_t); + break; + case DATA_TYPE_UINT64: + value_sz = sizeof (uint64_t); + break; + case DATA_TYPE_STRING: + if (data == NULL) + value_sz = 0; + else + value_sz = strlen(data) + 1; + break; + case DATA_TYPE_BYTE_ARRAY: + value_sz = nelem * sizeof (uint8_t); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint32_t); + break; + case DATA_TYPE_INT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (int64_t); + break; + case DATA_TYPE_UINT64_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + break; + case DATA_TYPE_STRING_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t); + + if (data != NULL) { + char *const *strs = data; + uint32_t i; + + for (i = 0; i < nelem; i++) { + if (strs[i] == NULL) + return (-1); + value_sz += strlen(strs[i]) + 1; + } + } + break; + case DATA_TYPE_NVLIST: + /* + * The decoded size of nvlist is constant. + */ + value_sz = NV_ALIGN(6 * 4); /* sizeof nvlist_t */ + break; + case DATA_TYPE_NVLIST_ARRAY: + value_sz = (uint64_t)nelem * sizeof (uint64_t) + + (uint64_t)nelem * NV_ALIGN(6 * 4); /* sizeof nvlist_t */ + break; + default: + return (-1); + } + + return (value_sz > INT32_MAX ? -1 : (int)value_sz); +} + +static int +get_nvp_data_size(data_type_t type, const void *data, uint32_t nelem) +{ + uint64_t value_sz = 0; + xdr_t xdr; + size_t size; + + switch (type) { + case DATA_TYPE_BOOLEAN: + value_sz = 0; + break; + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + /* Our smallest data unit is 32-bit */ + value_sz = sizeof (uint32_t); + break; + case DATA_TYPE_HRTIME: + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + value_sz = sizeof (uint64_t); + break; + case DATA_TYPE_STRING: + value_sz = 4 + NV_ALIGN4(strlen(data)); + break; + case DATA_TYPE_BYTE_ARRAY: + value_sz = NV_ALIGN4(nelem); + break; + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + case DATA_TYPE_INT32_ARRAY: + case DATA_TYPE_UINT32_ARRAY: + value_sz = 4 + (uint64_t)nelem * sizeof (uint32_t); + break; + case DATA_TYPE_INT64_ARRAY: + case DATA_TYPE_UINT64_ARRAY: + value_sz = 4 + (uint64_t)nelem * sizeof (uint64_t); + break; + case DATA_TYPE_STRING_ARRAY: + if (data != NULL) { + char *const *strs = data; + uint32_t i; + + for (i = 0; i < nelem; i++) { + value_sz += 4 + NV_ALIGN4(strlen(strs[i])); + } + } + break; + case DATA_TYPE_NVLIST: + xdr.xdr_idx = ((nvlist_t *)data)->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = ((nvlist_t *)data)->nv_size; + + if (!nvlist_size_native(&xdr, &size)) + return (-1); + + value_sz = size; + break; + case DATA_TYPE_NVLIST_ARRAY: + value_sz = 0; + for (uint32_t i = 0; i < nelem; i++) { + xdr.xdr_idx = ((nvlist_t **)data)[i]->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = ((nvlist_t **)data)[i]->nv_size; + + if (!nvlist_size_native(&xdr, &size)) + return (-1); + value_sz += size; + } + break; + default: + return (-1); + } + + return (value_sz > INT32_MAX ? -1 : (int)value_sz); +} + +#define NVPE_SIZE(name_len, data_len) \ + (4 + 4 + 4 + NV_ALIGN4(name_len) + 4 + 4 + data_len) +#define NVP_SIZE(name_len, data_len) \ + (NV_ALIGN((4 * 4) + (name_len)) + NV_ALIGN(data_len)) + +static int +nvlist_add_common(nvlist_t *nvl, const char *name, data_type_t type, + uint32_t nelem, const void *data) +{ + nvs_data_t *nvs; + nvp_header_t head, *hp; + uint8_t *ptr; + size_t namelen; + int decoded_size, encoded_size; + xdr_t xdr; + + nvs = (nvs_data_t *)nvl->nv_data; + if (nvs->nvl_nvflag & NV_UNIQUE_NAME) + (void) nvlist_remove(nvl, name, type); + + xdr.xdr_buf = nvl->nv_data; + xdr.xdr_idx = nvl->nv_data; + xdr.xdr_buf_size = nvl->nv_size; + if (!nvlist_size_native(&xdr, &nvl->nv_size)) return (EINVAL); - data = (nvs_data_t *)nvl->nv_data; - nvp = &data->nvl_pair; /* first pair in nvlist */ + namelen = strlen(name); + if ((decoded_size = get_value_size(type, data, nelem)) < 0) + return (EINVAL); + if ((encoded_size = get_nvp_data_size(type, data, nelem)) < 0) + return (EINVAL); - while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); + /* + * The encoded size is calculated as: + * encode_size (4) + decode_size (4) + + * name string size (4 + NV_ALIGN4(namelen) + + * data type (4) + nelem size (4) + datalen + * + * The decoded size is calculated as: + * Note: namelen is with terminating 0. + * NV_ALIGN(sizeof (nvpair_t) (4 * 4) + namelen + 1) + + * NV_ALIGN(data_len) + */ + + head.encoded_size = NVPE_SIZE(namelen, encoded_size); + head.decoded_size = NVP_SIZE(namelen + 1, decoded_size); + + if (nvl->nv_asize - nvl->nv_size < head.encoded_size + 8) { + ptr = realloc(nvl->nv_data, nvl->nv_asize + head.encoded_size); + if (ptr == NULL) + return (ENOMEM); + nvl->nv_data = ptr; + nvl->nv_asize += head.encoded_size; } - nvl->nv_data = (uint8_t *)nvp + sizeof(*nvp); + nvl->nv_idx = nvl->nv_data + nvl->nv_size - sizeof (*hp); + bzero(nvl->nv_idx, head.encoded_size + 8); + hp = (nvp_header_t *)nvl->nv_idx; + *hp = head; + nvl->nv_idx += sizeof (*hp); + *(unsigned *)nvl->nv_idx = namelen; + nvl->nv_idx += sizeof (unsigned); + strlcpy((char *)nvl->nv_idx, name, namelen + 1); + nvl->nv_idx += NV_ALIGN4(namelen); + *(unsigned *)nvl->nv_idx = type; + nvl->nv_idx += sizeof (unsigned); + *(unsigned *)nvl->nv_idx = nelem; + nvl->nv_idx += sizeof (unsigned); + + switch (type) { + case DATA_TYPE_BOOLEAN: + break; + case DATA_TYPE_BYTE_ARRAY: + *(unsigned *)nvl->nv_idx = encoded_size; + nvl->nv_idx += sizeof (unsigned); + bcopy(data, nvl->nv_idx, nelem); + nvl->nv_idx += encoded_size; + break; + case DATA_TYPE_STRING: + encoded_size = strlen(data); + *(unsigned *)nvl->nv_idx = encoded_size; + nvl->nv_idx += sizeof (unsigned); + strlcpy((char *)nvl->nv_idx, data, encoded_size + 1); + nvl->nv_idx += NV_ALIGN4(encoded_size); + break; + case DATA_TYPE_STRING_ARRAY: + for (uint32_t i = 0; i < nelem; i++) { + encoded_size = strlen(((char **)data)[i]); + *(unsigned *)nvl->nv_idx = encoded_size; + nvl->nv_idx += sizeof (unsigned); + strlcpy((char *)nvl->nv_idx, ((char **)data)[i], + encoded_size + 1); + nvl->nv_idx += NV_ALIGN4(encoded_size); + } + break; + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + case DATA_TYPE_INT8_ARRAY: + case DATA_TYPE_UINT8_ARRAY: + for (uint32_t i = 0; i < nelem; i++) { + *(unsigned *)nvl->nv_idx = ((uint8_t *)data)[i]; + nvl->nv_idx += sizeof (unsigned); + } + break; + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + case DATA_TYPE_INT16_ARRAY: + case DATA_TYPE_UINT16_ARRAY: + for (uint32_t i = 0; i < nelem; i++) { + *(unsigned *)nvl->nv_idx = ((uint16_t *)data)[i]; + nvl->nv_idx += sizeof (unsigned); + } + break; + case DATA_TYPE_NVLIST: + bcopy(((nvlist_t *)data)->nv_data, nvl->nv_idx, encoded_size); + break; + case DATA_TYPE_NVLIST_ARRAY: { + uint8_t *buf = nvl->nv_idx; + size_t size; + xdr_t xdr; + + for (uint32_t i = 0; i < nelem; i++) { + xdr.xdr_idx = ((nvlist_t **)data)[i]->nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = ((nvlist_t **)data)[i]->nv_size; + + if (!nvlist_size_native(&xdr, &size)) + return (EINVAL); + + bcopy(((nvlist_t **)data)[i]->nv_data, buf, size); + buf += size; + } + break; + } + default: + bcopy(data, nvl->nv_idx, encoded_size); + } + + nvl->nv_size += head.encoded_size; + return (0); } -void -nvlist_print(nvlist_t *nvl, unsigned int indent) +int +nvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_VALUE, 1, + &value)); +} + +int +nvlist_add_byte(nvlist_t *nvl, const char *name, uint8_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE, 1, &value)); +} + +int +nvlist_add_int8(nvlist_t *nvl, const char *name, int8_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8, 1, &value)); +} + +int +nvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8, 1, &value)); +} + +int +nvlist_add_int16(nvlist_t *nvl, const char *name, int16_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16, 1, &value)); +} + +int +nvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16, 1, &value)); +} + +int +nvlist_add_int32(nvlist_t *nvl, const char *name, int32_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32, 1, &value)); +} + +int +nvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32, 1, &value)); +} + +int +nvlist_add_int64(nvlist_t *nvl, const char *name, int64_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64, 1, &value)); +} + +int +nvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64, 1, &value)); +} + +int +nvlist_add_string(nvlist_t *nvl, const char *name, const char *value) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING, 1, value)); +} + +int +nvlist_add_boolean_array(nvlist_t *nvl, const char *name, + boolean_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BOOLEAN_ARRAY, n, a)); +} + +int +nvlist_add_byte_array(nvlist_t *nvl, const char *name, uint8_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_BYTE_ARRAY, n, a)); +} + +int +nvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT8_ARRAY, n, a)); +} + +int +nvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT8_ARRAY, n, a)); +} + +int +nvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT16_ARRAY, n, a)); +} + +int +nvlist_add_uint16_array(nvlist_t *nvl, const char *name, uint16_t *a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT16_ARRAY, n, a)); +} + +int +nvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT32_ARRAY, n, a)); +} + +int +nvlist_add_uint32_array(nvlist_t *nvl, const char *name, uint32_t *a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT32_ARRAY, n, a)); +} + +int +nvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_INT64_ARRAY, n, a)); +} + +int +nvlist_add_uint64_array(nvlist_t *nvl, const char *name, uint64_t *a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_UINT64_ARRAY, n, a)); +} + +int +nvlist_add_string_array(nvlist_t *nvl, const char *name, + char * const *a, uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_STRING_ARRAY, n, a)); +} + +int +nvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST, 1, val)); +} + +int +nvlist_add_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **a, + uint32_t n) +{ + return (nvlist_add_common(nvl, name, DATA_TYPE_NVLIST_ARRAY, n, a)); +} + +static const char *typenames[] = { + "DATA_TYPE_UNKNOWN", + "DATA_TYPE_BOOLEAN", + "DATA_TYPE_BYTE", + "DATA_TYPE_INT16", + "DATA_TYPE_UINT16", + "DATA_TYPE_INT32", + "DATA_TYPE_UINT32", + "DATA_TYPE_INT64", + "DATA_TYPE_UINT64", + "DATA_TYPE_STRING", + "DATA_TYPE_BYTE_ARRAY", + "DATA_TYPE_INT16_ARRAY", + "DATA_TYPE_UINT16_ARRAY", + "DATA_TYPE_INT32_ARRAY", + "DATA_TYPE_UINT32_ARRAY", + "DATA_TYPE_INT64_ARRAY", + "DATA_TYPE_UINT64_ARRAY", + "DATA_TYPE_STRING_ARRAY", + "DATA_TYPE_HRTIME", + "DATA_TYPE_NVLIST", + "DATA_TYPE_NVLIST_ARRAY", + "DATA_TYPE_BOOLEAN_VALUE", + "DATA_TYPE_INT8", + "DATA_TYPE_UINT8", + "DATA_TYPE_BOOLEAN_ARRAY", + "DATA_TYPE_INT8_ARRAY", + "DATA_TYPE_UINT8_ARRAY" +}; + +int +nvpair_type_from_name(const char *name) +{ + unsigned i; + + for (i = 0; i < nitems(typenames); i++) { + if (strcmp(name, typenames[i]) == 0) + return (i); + } + return (0); +} + +nvp_header_t * +nvpair_find(nvlist_t *nv, const char *name) +{ + nvp_header_t *nvh; + + nvh = NULL; + while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) { + nv_string_t *nvp_name; + + nvp_name = (nv_string_t *)(nvh + 1); + if (nvp_name->nv_size == strlen(name) && + memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0) + break; + } + return (nvh); +} + +void +nvpair_print(nvp_header_t *nvp, unsigned int indent) { - static const char *typenames[] = { - "DATA_TYPE_UNKNOWN", - "DATA_TYPE_BOOLEAN", - "DATA_TYPE_BYTE", - "DATA_TYPE_INT16", - "DATA_TYPE_UINT16", - "DATA_TYPE_INT32", - "DATA_TYPE_UINT32", - "DATA_TYPE_INT64", - "DATA_TYPE_UINT64", - "DATA_TYPE_STRING", - "DATA_TYPE_BYTE_ARRAY", - "DATA_TYPE_INT16_ARRAY", - "DATA_TYPE_UINT16_ARRAY", - "DATA_TYPE_INT32_ARRAY", - "DATA_TYPE_UINT32_ARRAY", - "DATA_TYPE_INT64_ARRAY", - "DATA_TYPE_UINT64_ARRAY", - "DATA_TYPE_STRING_ARRAY", - "DATA_TYPE_HRTIME", - "DATA_TYPE_NVLIST", - "DATA_TYPE_NVLIST_ARRAY", - "DATA_TYPE_BOOLEAN_VALUE", - "DATA_TYPE_INT8", - "DATA_TYPE_UINT8", - "DATA_TYPE_BOOLEAN_ARRAY", - "DATA_TYPE_INT8_ARRAY", - "DATA_TYPE_UINT8_ARRAY" - }; - nvs_data_t *data; - nvp_header_t *nvp; nv_string_t *nvp_name; nv_pair_data_t *nvp_data; nvlist_t nvlist; - int i, j; + xdr_t xdr; + unsigned i, j, u; + uint64_t u64; + + nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof (*nvp)); + nvp_data = (nv_pair_data_t *) + NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + nvp_name->nv_size); + + for (i = 0; i < indent; i++) + printf(" "); + + printf("%s [%d] %.*s", typenames[nvp_data->nv_type], + nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data); + + switch (nvp_data->nv_type) { + case DATA_TYPE_BYTE: + case DATA_TYPE_INT8: + case DATA_TYPE_UINT8: + bcopy(nvp_data->nv_data, &u, sizeof (u)); + printf(" = 0x%x\n", (unsigned char)u); + break; + + case DATA_TYPE_INT16: + case DATA_TYPE_UINT16: + bcopy(nvp_data->nv_data, &u, sizeof (u)); + printf(" = 0x%hx\n", (unsigned short)u); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_INT32: + case DATA_TYPE_UINT32: + bcopy(nvp_data->nv_data, &u, sizeof (u)); + printf(" = 0x%x\n", u); + break; + + case DATA_TYPE_INT64: + case DATA_TYPE_UINT64: + bcopy(nvp_data->nv_data, &u64, sizeof (u64)); + printf(" = 0x%jx\n", (uintmax_t)u64); + break; + + case DATA_TYPE_STRING: + case DATA_TYPE_STRING_ARRAY: + nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; + for (i = 0; i < nvp_data->nv_nelem; i++) { + printf(" = \"%.*s\"\n", nvp_name->nv_size, + nvp_name->nv_data); + } + break; + + case DATA_TYPE_NVLIST: + printf("\n"); + nvlist.nv_data = &nvp_data->nv_data[0]; + nvlist_print(&nvlist, indent + 2); + break; + + case DATA_TYPE_NVLIST_ARRAY: + nvlist.nv_data = &nvp_data->nv_data[0]; + for (j = 0; j < nvp_data->nv_nelem; j++) { + size_t size; + + printf("[%d]\n", j); + nvlist_print(&nvlist, indent + 2); + if (j != nvp_data->nv_nelem - 1) { + for (i = 0; i < indent; i++) + printf(" "); + printf("%s %.*s", + typenames[nvp_data->nv_type], + nvp_name->nv_size, + nvp_name->nv_data); + } + xdr.xdr_idx = nvlist.nv_data; + xdr.xdr_buf = xdr.xdr_idx; + xdr.xdr_buf_size = nvp->encoded_size - + (xdr.xdr_idx - (uint8_t *)nvp); + + if (!nvlist_size_native(&xdr, &size)) + return; + + nvlist.nv_data += size; + } + break; + + default: + printf("\n"); + } +} + +void +nvlist_print(const nvlist_t *nvl, unsigned int indent) +{ + nvs_data_t *data; + nvp_header_t *nvp; data = (nvs_data_t *)nvl->nv_data; nvp = &data->nvl_pair; /* first pair in nvlist */ while (nvp->encoded_size != 0 && nvp->decoded_size != 0) { - nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp)); - nvp_data = (nv_pair_data_t *) - NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] + - nvp_name->nv_size); - - for (int i = 0; i < indent; i++) - printf(" "); - - printf("%s [%d] %.*s", typenames[nvp_data->nv_type], - nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data); - - switch (nvp_data->nv_type) { - case DATA_TYPE_UINT64: { - uint64_t val; - - val = *(uint64_t *)nvp_data->nv_data; - printf(" = 0x%jx\n", (uintmax_t)val); - break; - } - - case DATA_TYPE_STRING: { - nvp_name = (nv_string_t *)&nvp_data->nv_data[0]; - printf(" = \"%.*s\"\n", nvp_name->nv_size, - nvp_name->nv_data ); - break; - } - - case DATA_TYPE_NVLIST: - printf("\n"); - nvlist.nv_data = &nvp_data->nv_data[0]; - nvlist_print(&nvlist, indent + 2); - break; - - case DATA_TYPE_NVLIST_ARRAY: - nvlist.nv_data = &nvp_data->nv_data[0]; - for (j = 0; j < nvp_data->nv_nelem; j++) { - data = (nvs_data_t *)nvlist.nv_data; - printf("[%d]\n", j); - nvlist_print(&nvlist, indent + 2); - if (j != nvp_data->nv_nelem - 1) { - for (i = 0; i < indent; i++) - printf(" "); - printf("%s %.*s", - typenames[nvp_data->nv_type], - nvp_name->nv_size, - nvp_name->nv_data); - } - nvlist.nv_data = (uint8_t *)data + - nvlist_size(&native, nvlist.nv_data); - } - break; - - default: - printf("\n"); - } + nvpair_print(nvp, indent); nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size); } printf("%*s\n", indent + 13, "End of nvlist"); diff --git a/stand/libsa/zfs/zfs.c b/stand/libsa/zfs/zfs.c index 6bceb1ee5270..ea0fdace680c 100644 --- a/stand/libsa/zfs/zfs.c +++ b/stand/libsa/zfs/zfs.c @@ -486,8 +486,7 @@ vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes) } static int -vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, - size_t bytes) +vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes) { int fd, ret; size_t head, tail, total_size, full_sec_size; @@ -496,8 +495,8 @@ vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, ssize_t res; char *outbuf, *bouncebuf; - fd = (uintptr_t)priv; - outbuf = (char *) buf; + fd = (uintptr_t)vdev->v_priv; + outbuf = (char *)buf; bouncebuf = NULL; ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); @@ -532,14 +531,14 @@ vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, /* Partial data for first sector */ if (head > 0) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -555,20 +554,20 @@ vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, if (full_sec_size > 0) { if (bytes < full_sec_size) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, bytes); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } } else { res = write(fd, outbuf, full_sec_size); - if (res != full_sec_size) { + if ((unsigned)res != full_sec_size) { ret = EIO; goto error; } @@ -579,14 +578,14 @@ vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, /* Partial data write to last sector */ if (do_tail_write) { res = read(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } memcpy(bouncebuf, outbuf, secsz - tail); (void) lseek(fd, -secsz, SEEK_CUR); res = write(fd, bouncebuf, secsz); - if (res != secsz) { + if ((unsigned)res != secsz) { ret = EIO; goto error; } @@ -598,102 +597,6 @@ vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, return (ret); } -static void -vdev_clear_pad2(vdev_t *vdev) -{ - vdev_t *kid; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - zio_checksum_info_t *ci; - zio_cksum_t cksum; - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - vdev_clear_pad2(kid); - } - - if (!STAILQ_EMPTY(&vdev->v_children)) - return; - - be = calloc(1, sizeof (*be)); - if (be == NULL) { - printf("failed to clear be area: out of memory\n"); - return; - } - - ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; - be->vbe_zbt.zec_magic = ZEC_MAGIC; - zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); - ci->ci_func[0](be, sizeof (*be), NULL, &cksum); - be->vbe_zbt.zec_cksum = cksum; - - if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { - printf("failed to clear be area of primary vdev: %d\n", - errno); - } - free(be); -} - -/* - * Read the next boot command from pad2. - * If any instance of pad2 is set to empty string, or the returned string - * values are not the same, we consider next boot not to be set. - */ -static char * -vdev_read_pad2(vdev_t *vdev) -{ - vdev_t *kid; - char *tmp, *result = NULL; - vdev_boot_envblock_t *be; - off_t off = offsetof(vdev_label_t, vl_be); - - STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { - if (kid->v_state != VDEV_STATE_HEALTHY) - continue; - tmp = vdev_read_pad2(kid); - if (tmp == NULL) - continue; - - /* The next boot is not set, we are done. */ - if (*tmp == '\0') { - free(result); - return (tmp); - } - if (result == NULL) { - result = tmp; - continue; - } - /* Are the next boot strings different? */ - if (strcmp(result, tmp) != 0) { - free(tmp); - *result = '\0'; - break; - } - free(tmp); - } - if (result != NULL) - return (result); - - be = malloc(sizeof (*be)); - if (be == NULL) - return (NULL); - - if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { - return (NULL); - } - - switch (be->vbe_version) { - case VB_RAW: - case VB_NVLIST: - result = strdup(be->vbe_bootenv); - default: - /* Backward compatibility with initial nextboot feaure. */ - result = strdup((char *)be); - } - return (result); -} - static int zfs_dev_init(void) { @@ -746,7 +649,7 @@ zfs_probe(int fd, uint64_t *pool_guid) int ret; spa = NULL; - ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); + ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa); if (ret == 0 && pool_guid != NULL) *pool_guid = spa->spa_guid; return (ret); @@ -769,7 +672,7 @@ zfs_probe_partition(void *arg, const char *partname, ppa = (struct zfs_probe_args *)arg; strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); devname[strlen(ppa->devname) - 1] = '\0'; - sprintf(devname, "%s%s:", devname, partname); + snprintf(devname, sizeof(devname), "%s%s:", devname, partname); pa.fd = open(devname, O_RDWR); if (pa.fd == -1) return (0); @@ -792,57 +695,728 @@ zfs_probe_partition(void *arg, const char *partname, return (0); } +/* + * Return bootenv nvlist from pool label. + */ int -zfs_nextboot(void *vdev, char *buf, size_t size) +zfs_get_bootenv(void *vdev, nvlist_t **benvp) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + nvlist_t *benv = NULL; + vdev_t *vd; + spa_t *spa; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) { + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, + v_childlink) { + benv = vdev_read_bootenv(vd); + + if (benv != NULL) + break; + } + spa->spa_bootenv = benv; + } else { + benv = spa->spa_bootenv; + } + + if (benv == NULL) + return (ENOENT); + + *benvp = benv; + return (0); +} + +/* + * Store nvlist to pool label bootenv area. Also updates cached pointer in spa. + */ +int +zfs_set_bootenv(void *vdev, nvlist_t *benv) { struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; vdev_t *vd; - char *result = NULL; if (dev->dd.d_dev->dv_type != DEVT_ZFS) - return (1); + return (ENOTSUP); - if (dev->pool_guid == 0) - spa = STAILQ_FIRST(&zfs_pools); - else - spa = spa_find_by_guid(dev->pool_guid); - - if (spa == NULL) { - printf("ZFS: can't find pool by guid\n"); - return (1); - } + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - char *tmp = vdev_read_pad2(vd); - - /* Continue on error. */ - if (tmp == NULL) - continue; - /* Nextboot is not set. */ - if (*tmp == '\0') { - free(result); - free(tmp); - return (1); - } - if (result == NULL) { - result = tmp; - continue; - } - free(tmp); - } - if (result == NULL) - return (1); - - STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - vdev_clear_pad2(vd); + vdev_write_bootenv(vd, benv); } - strlcpy(buf, result, size); - free(result); + spa->spa_bootenv = benv; return (0); } +/* + * Get bootonce value by key. The bootonce pair is removed + * from the bootenv nvlist and the remaining nvlist is committed back to disk. + */ +int +zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size) +{ + nvlist_t *benv; + char *result = NULL; + int result_size, rv; + + if ((rv = zfs_get_bootenv(vdev, &benv)) != 0) + return (rv); + + if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL, + &result, &result_size)) == 0) { + if (result_size == 0) { + /* ignore empty string */ + rv = ENOENT; + } else { + size = MIN((size_t)result_size + 1, size); + strlcpy(buf, result, size); + } + (void) nvlist_remove(benv, key, DATA_TYPE_STRING); + (void) zfs_set_bootenv(vdev, benv); + } + + return (rv); +} + +/* + * nvstore backend. + */ + +static int zfs_nvstore_setter(void *, int, const char *, + const void *, size_t); +static int zfs_nvstore_setter_str(void *, const char *, const char *, + const char *); +static int zfs_nvstore_unset_impl(void *, const char *, bool); +static int zfs_nvstore_setenv(void *, void *); + +/* + * nvstore is only present for current rootfs pool. + */ +static int +zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value) +{ + struct zfs_devdesc *dev; + int rv; + + archsw.arch_getdev((void **)&dev, NULL, NULL); + if (dev == NULL) + return (ENXIO); + + rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value); + + free(dev); + return (rv); +} + +/* + * nvstore is only present for current rootfs pool. + */ +static int +zfs_nvstore_unsethook(struct env_var *ev) +{ + struct zfs_devdesc *dev; + int rv; + + archsw.arch_getdev((void **)&dev, NULL, NULL); + if (dev == NULL) + return (ENXIO); + + rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false); + + free(dev); + return (rv); +} + +static int +zfs_nvstore_getter(void *vdev, const char *name, void **data) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + char *str, **ptr; + int size; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) + return (ENOENT); + + rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size); + if (rv == 0) { + ptr = (char **)data; + asprintf(ptr, "%.*s", size, str); + if (*data == NULL) + rv = ENOMEM; + } + nvlist_destroy(nv); + return (rv); +} + +static int +zfs_nvstore_setter(void *vdev, int type, const char *name, + const void *data, size_t size) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + int rv; + bool env_set = true; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) { + nv = nvlist_create(NV_UNIQUE_NAME); + if (nv == NULL) + return (ENOMEM); + } + + rv = 0; + switch (type) { + case DATA_TYPE_INT8: + if (size != sizeof (int8_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_int8(nv, name, *(int8_t *)data); + break; + + case DATA_TYPE_INT16: + if (size != sizeof (int16_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_int16(nv, name, *(int16_t *)data); + break; + + case DATA_TYPE_INT32: + if (size != sizeof (int32_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_int32(nv, name, *(int32_t *)data); + break; + + case DATA_TYPE_INT64: + if (size != sizeof (int64_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_int64(nv, name, *(int64_t *)data); + break; + + case DATA_TYPE_BYTE: + if (size != sizeof (uint8_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_byte(nv, name, *(int8_t *)data); + break; + + case DATA_TYPE_UINT8: + if (size != sizeof (uint8_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_uint8(nv, name, *(int8_t *)data); + break; + + case DATA_TYPE_UINT16: + if (size != sizeof (uint16_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_uint16(nv, name, *(uint16_t *)data); + break; + + case DATA_TYPE_UINT32: + if (size != sizeof (uint32_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_uint32(nv, name, *(uint32_t *)data); + break; + + case DATA_TYPE_UINT64: + if (size != sizeof (uint64_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_uint64(nv, name, *(uint64_t *)data); + break; + + case DATA_TYPE_STRING: + rv = nvlist_add_string(nv, name, data); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + if (size != sizeof (boolean_t)) { + rv = EINVAL; + break; + } + rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data); + break; + + default: + rv = EINVAL; + break; + } + + if (rv == 0) { + rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv); + if (rv == 0) { + rv = zfs_set_bootenv(vdev, spa->spa_bootenv); + } + if (rv == 0) { + if (env_set) { + rv = zfs_nvstore_setenv(vdev, + nvpair_find(nv, name)); + } else { + env_discard(env_getenv(name)); + rv = 0; + } + } + } + + nvlist_destroy(nv); + return (rv); +} + +static int +get_int64(const char *data, int64_t *ip) +{ + char *end; + int64_t val; + + errno = 0; + val = strtoll(data, &end, 0); + if (errno != 0 || *data == '\0' || *end != '\0') + return (EINVAL); + + *ip = val; + return (0); +} + +static int +get_uint64(const char *data, uint64_t *ip) +{ + char *end; + uint64_t val; + + errno = 0; + val = strtoull(data, &end, 0); + if (errno != 0 || *data == '\0' || *end != '\0') + return (EINVAL); + + *ip = val; + return (0); +} + +/* + * Translate textual data to data type. If type is not set, and we are + * creating new pair, use DATA_TYPE_STRING. + */ +static int +zfs_nvstore_setter_str(void *vdev, const char *type, const char *name, + const char *data) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + int rv; + data_type_t dt; + int64_t val; + uint64_t uval; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) { + nv = NULL; + } + + if (type == NULL) { + nvp_header_t *nvh; + + /* + * if there is no existing pair, default to string. + * Otherwise, use type from existing pair. + */ + nvh = nvpair_find(nv, name); + if (nvh == NULL) { + dt = DATA_TYPE_STRING; + } else { + nv_string_t *nvp_name; + nv_pair_data_t *nvp_data; + + nvp_name = (nv_string_t *)(nvh + 1); + nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + + NV_ALIGN4(nvp_name->nv_size)); + dt = nvp_data->nv_type; + } + } else { + dt = nvpair_type_from_name(type); + } + nvlist_destroy(nv); + + rv = 0; + switch (dt) { + case DATA_TYPE_INT8: + rv = get_int64(data, &val); + if (rv == 0) { + int8_t v = val; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + case DATA_TYPE_INT16: + rv = get_int64(data, &val); + if (rv == 0) { + int16_t v = val; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + case DATA_TYPE_INT32: + rv = get_int64(data, &val); + if (rv == 0) { + int32_t v = val; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + case DATA_TYPE_INT64: + rv = get_int64(data, &val); + if (rv == 0) { + rv = zfs_nvstore_setter(vdev, dt, name, &val, + sizeof (val)); + } + break; + + case DATA_TYPE_BYTE: + rv = get_uint64(data, &uval); + if (rv == 0) { + uint8_t v = uval; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + + case DATA_TYPE_UINT8: + rv = get_uint64(data, &uval); + if (rv == 0) { + uint8_t v = uval; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + + case DATA_TYPE_UINT16: + rv = get_uint64(data, &uval); + if (rv == 0) { + uint16_t v = uval; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + + case DATA_TYPE_UINT32: + rv = get_uint64(data, &uval); + if (rv == 0) { + uint32_t v = uval; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + break; + + case DATA_TYPE_UINT64: + rv = get_uint64(data, &uval); + if (rv == 0) { + rv = zfs_nvstore_setter(vdev, dt, name, &uval, + sizeof (uval)); + } + break; + + case DATA_TYPE_STRING: + rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + rv = get_int64(data, &val); + if (rv == 0) { + boolean_t v = val; + + rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v)); + } + + default: + rv = EINVAL; + } + return (rv); +} + +static int +zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) + return (ENOENT); + + rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN); + if (rv == 0) { + if (nvlist_next_nvpair(nv, NULL) == NULL) { + rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE, + DATA_TYPE_NVLIST); + } else { + rv = nvlist_add_nvlist(spa->spa_bootenv, + OS_NVSTORE, nv); + } + if (rv == 0) + rv = zfs_set_bootenv(vdev, spa->spa_bootenv); + } + + if (unset_env) + env_discard(env_getenv(name)); + return (rv); +} + +static int +zfs_nvstore_unset(void *vdev, const char *name) +{ + return (zfs_nvstore_unset_impl(vdev, name, true)); +} + +static int +zfs_nvstore_print(void *vdev __unused, void *ptr) +{ + + nvpair_print(ptr, 0); + return (0); +} + +/* + * Create environment variable from nvpair. + * set hook will update nvstore with new value, unset hook will remove + * variable from nvstore. + */ +static int +zfs_nvstore_setenv(void *vdev __unused, void *ptr) +{ + nvp_header_t *nvh = ptr; + nv_string_t *nvp_name, *nvp_value; + nv_pair_data_t *nvp_data; + char *name, *value; + int rv = 0; + + if (nvh == NULL) + return (ENOENT); + + nvp_name = (nv_string_t *)(nvh + 1); + nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] + + NV_ALIGN4(nvp_name->nv_size)); + + if ((name = nvstring_get(nvp_name)) == NULL) + return (ENOMEM); + + value = NULL; + switch (nvp_data->nv_type) { + case DATA_TYPE_BYTE: + case DATA_TYPE_UINT8: + (void) asprintf(&value, "%uc", + *(unsigned *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_INT8: + (void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_INT16: + (void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_UINT16: + (void) asprintf(&value, "%hu", + *(unsigned short *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_BOOLEAN_VALUE: + case DATA_TYPE_INT32: + (void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_UINT32: + (void) asprintf(&value, "%u", + *(unsigned *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_INT64: + (void) asprintf(&value, "%jd", + (intmax_t)*(int64_t *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_UINT64: + (void) asprintf(&value, "%ju", + (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]); + if (value == NULL) + rv = ENOMEM; + break; + + case DATA_TYPE_STRING: + nvp_value = (nv_string_t *)&nvp_data->nv_data[0]; + if ((value = nvstring_get(nvp_value)) == NULL) { + rv = ENOMEM; + break; + } + break; + + default: + rv = EINVAL; + break; + } + + if (value != NULL) { + rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value, + zfs_nvstore_sethook, zfs_nvstore_unsethook); + free(value); + } + free(name); + return (rv); +} + +static int +zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *)) +{ + struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; + spa_t *spa; + nvlist_t *nv; + nvp_header_t *nvh; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + if (spa->spa_bootenv == NULL) + return (ENXIO); + + if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST, + NULL, &nv, NULL) != 0) + return (ENOENT); + + rv = 0; + nvh = NULL; + while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) { + rv = cb(vdev, nvh); + if (rv != 0) + break; + } + return (rv); +} + +nvs_callbacks_t nvstore_zfs_cb = { + .nvs_getter = zfs_nvstore_getter, + .nvs_setter = zfs_nvstore_setter, + .nvs_setter_str = zfs_nvstore_setter_str, + .nvs_unset = zfs_nvstore_unset, + .nvs_print = zfs_nvstore_print, + .nvs_iterate = zfs_nvstore_iterate +}; + +int +zfs_attach_nvstore(void *vdev) +{ + struct zfs_devdesc *dev = vdev; + spa_t *spa; + uint64_t version; + int rv; + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (ENOTSUP); + + if ((spa = spa_find_by_dev(dev)) == NULL) + return (ENXIO); + + rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64, + NULL, &version, NULL); + + if (rv != 0 || version != VB_NVLIST) { + return (ENXIO); + } + + dev = malloc(sizeof (*dev)); + if (dev == NULL) + return (ENOMEM); + memcpy(dev, vdev, sizeof (*dev)); + + rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev); + if (rv != 0) + free(dev); + else + rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv); + return (rv); +} + int zfs_probe_dev(const char *devname, uint64_t *pool_guid) { @@ -939,12 +1513,9 @@ zfs_dev_open(struct open_file *f, ...) dev = va_arg(args, struct zfs_devdesc *); va_end(args); - if (dev->pool_guid == 0) - spa = STAILQ_FIRST(&zfs_pools); - else - spa = spa_find_by_guid(dev->pool_guid); - if (!spa) + if ((spa = spa_find_by_dev(dev)) == NULL) return (ENXIO); + mount = malloc(sizeof(*mount)); if (mount == NULL) rv = ENOMEM; @@ -1073,10 +1644,11 @@ zfs_fmtdev(void *vdev) } if (rootname[0] == '\0') - sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name); + snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name, + spa->spa_name); else - sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name, - rootname); + snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name, + spa->spa_name, rootname); return (buf); } diff --git a/stand/libsa/zfs/zfsimpl.c b/stand/libsa/zfs/zfsimpl.c index d8dd88e930cf..8b5cbc2feea9 100644 --- a/stand/libsa/zfs/zfsimpl.c +++ b/stand/libsa/zfs/zfsimpl.c @@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$"); * Stand-alone ZFS file reader. */ +#include #include #include #include #include +#include #include #include "zfsimpl.h" @@ -220,8 +222,8 @@ vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, size_t psize; int rc; - if (!vdev->v_phys_read) - return (EIO); + if (vdev->v_phys_read == NULL) + return (ENOTSUP); if (bp) { psize = BP_GET_PSIZE(bp); @@ -229,7 +231,7 @@ vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, psize = size; } - rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize); + rc = vdev->v_phys_read(vdev, vdev->v_priv, offset, buf, psize); if (rc == 0) { if (bp != NULL) rc = zio_checksum_verify(vdev->v_spa, bp, buf); @@ -238,6 +240,15 @@ vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, return (rc); } +static int +vdev_write_phys(vdev_t *vdev, void *buf, off_t offset, size_t size) +{ + if (vdev->v_phys_write == NULL) + return (ENOTSUP); + + return (vdev->v_phys_write(vdev, offset, buf, size)); +} + typedef struct remap_segment { vdev_t *rs_vd; uint64_t rs_offset; @@ -1084,7 +1095,7 @@ static int vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *top_vdev, *vdev; - nvlist_t *kids = NULL; + nvlist_t **kids = NULL; int rc, nkids; /* Get top vdev. */ @@ -1105,27 +1116,18 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) for (int i = 0; i < nkids; i++) { uint64_t guid; - rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, + rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); - if (rc != 0) { - nvlist_destroy(kids); - return (rc); - } - rc = vdev_init(guid, kids, &vdev); - if (rc != 0) { - nvlist_destroy(kids); - return (rc); - } + if (rc != 0) + goto done; + + rc = vdev_init(guid, kids[i], &vdev); + if (rc != 0) + goto done; vdev->v_spa = spa; vdev->v_top = top_vdev; vdev_insert(top_vdev, vdev); - - rc = nvlist_next(kids); - if (rc != 0) { - nvlist_destroy(kids); - return (rc); - } } } else { /* @@ -1134,7 +1136,12 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) */ rc = 0; } - nvlist_destroy(kids); +done: + if (kids != NULL) { + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + } return (rc); } @@ -1210,7 +1217,7 @@ static int vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *vdev; - nvlist_t *kids = NULL; + nvlist_t **kids = NULL; int rc, nkids; /* Update top vdev. */ @@ -1225,23 +1232,23 @@ vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) for (int i = 0; i < nkids; i++) { uint64_t guid; - rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, + rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) break; vdev = vdev_find(guid); if (vdev != NULL) - vdev_set_initial_state(vdev, kids); - - rc = nvlist_next(kids); - if (rc != 0) - break; + vdev_set_initial_state(vdev, kids[i]); } } else { rc = 0; } - nvlist_destroy(kids); + if (kids != NULL) { + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + } return (rc); } @@ -1250,7 +1257,7 @@ static int vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) { uint64_t pool_guid, vdev_children; - nvlist_t *vdevs = NULL, *kids = NULL; + nvlist_t *vdevs = NULL, **kids = NULL; int rc, nkids; if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, @@ -1285,7 +1292,7 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) uint64_t guid; vdev_t *vdev; - rc = nvlist_find(kids, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + rc = nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, NULL, &guid, NULL); if (rc != 0) break; @@ -1294,16 +1301,17 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) * Top level vdev is missing, create it. */ if (vdev == NULL) - rc = vdev_from_nvlist(spa, guid, kids); + rc = vdev_from_nvlist(spa, guid, kids[i]); else - rc = vdev_update_from_nvlist(guid, kids); - if (rc != 0) - break; - rc = nvlist_next(kids); + rc = vdev_update_from_nvlist(guid, kids[i]); if (rc != 0) break; } - nvlist_destroy(kids); + if (kids != NULL) { + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + } /* * Re-evaluate top-level vdev state. @@ -1337,6 +1345,19 @@ spa_find_by_name(const char *name) return (NULL); } +static spa_t * +spa_find_by_dev(struct zfs_devdesc *dev) +{ + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (NULL); + + if (dev->pool_guid == 0) + return (STAILQ_FIRST(&zfs_pools)); + + return (spa_find_by_guid(dev->pool_guid)); +} + static spa_t * spa_create(uint64_t guid, const char *name) { @@ -1589,6 +1610,254 @@ vdev_label_read(vdev_t *vd, int l, void *buf, uint64_t offset, return (vdev_read_phys(vd, &bp, buf, off, size)); } +/* + * We do need to be sure we write to correct location. + * Our vdev label does consist of 4 fields: + * pad1 (8k), reserved. + * bootenv (8k), checksummed, previously reserved, may contian garbage. + * vdev_phys (112k), checksummed + * uberblock ring (128k), checksummed. + * + * Since bootenv area may contain garbage, we can not reliably read it, as + * we can get checksum errors. + * Next best thing is vdev_phys - it is just after bootenv. It still may + * be corrupted, but in such case we will miss this one write. + */ +static int +vdev_label_write_validate(vdev_t *vd, int l, uint64_t offset) +{ + uint64_t off, o_phys; + void *buf; + size_t size = VDEV_PHYS_SIZE; + int rc; + + o_phys = offsetof(vdev_label_t, vl_vdev_phys); + off = vdev_label_offset(vd->v_psize, l, o_phys); + + /* off should be 8K from bootenv */ + if (vdev_label_offset(vd->v_psize, l, offset) + VDEV_PAD_SIZE != off) + return (EINVAL); + + buf = malloc(size); + if (buf == NULL) + return (ENOMEM); + + /* Read vdev_phys */ + rc = vdev_label_read(vd, l, buf, o_phys, size); + free(buf); + return (rc); +} + +static int +vdev_label_write(vdev_t *vd, int l, vdev_boot_envblock_t *be, uint64_t offset) +{ + zio_checksum_info_t *ci; + zio_cksum_t cksum; + off_t off; + size_t size = VDEV_PAD_SIZE; + int rc; + + if (vd->v_phys_write == NULL) + return (ENOTSUP); + + off = vdev_label_offset(vd->v_psize, l, offset); + + rc = vdev_label_write_validate(vd, l, offset); + if (rc != 0) { + return (rc); + } + + ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; + be->vbe_zbt.zec_magic = ZEC_MAGIC; + zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); + ci->ci_func[0](be, size, NULL, &cksum); + be->vbe_zbt.zec_cksum = cksum; + + return (vdev_write_phys(vd, be, off, size)); +} + +static int +vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) +{ + vdev_t *kid; + int rv = 0, rc; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + rc = vdev_write_bootenv_impl(kid, be); + if (rv == 0) + rv = rc; + } + + /* + * Non-leaf vdevs do not have v_phys_write. + */ + if (vdev->v_phys_write == NULL) + return (rv); + + for (int l = 0; l < VDEV_LABELS; l++) { + rc = vdev_label_write(vdev, l, be, + offsetof(vdev_label_t, vl_be)); + if (rc != 0) { + printf("failed to write bootenv to %s label %d: %d\n", + vdev->v_name ? vdev->v_name : "unknown", l, rc); + rv = rc; + } + } + return (rv); +} + +int +vdev_write_bootenv(vdev_t *vdev, nvlist_t *nvl) +{ + vdev_boot_envblock_t *be; + nvlist_t nv, *nvp; + uint64_t version; + int rv; + + if (nvl->nv_size > sizeof(be->vbe_bootenv)) + return (E2BIG); + + version = VB_RAW; + nvp = vdev_read_bootenv(vdev); + if (nvp != NULL) { + nvlist_find(nvp, BOOTENV_VERSION, DATA_TYPE_UINT64, NULL, + &version, NULL); + nvlist_destroy(nvp); + } + + be = calloc(1, sizeof(*be)); + if (be == NULL) + return (ENOMEM); + + be->vbe_version = version; + switch (version) { + case VB_RAW: + /* + * If there is no envmap, we will just wipe bootenv. + */ + nvlist_find(nvl, GRUB_ENVMAP, DATA_TYPE_STRING, NULL, + be->vbe_bootenv, NULL); + rv = 0; + break; + + case VB_NVLIST: + nv.nv_header = nvl->nv_header; + nv.nv_asize = nvl->nv_asize; + nv.nv_size = nvl->nv_size; + + bcopy(&nv.nv_header, be->vbe_bootenv, sizeof(nv.nv_header)); + nv.nv_data = be->vbe_bootenv + sizeof(nvs_header_t); + bcopy(nvl->nv_data, nv.nv_data, nv.nv_size); + rv = nvlist_export(&nv); + break; + + default: + rv = EINVAL; + break; + } + + if (rv == 0) { + be->vbe_version = htobe64(be->vbe_version); + rv = vdev_write_bootenv_impl(vdev, be); + } + free(be); + return (rv); +} + +/* + * Read the bootenv area from pool label, return the nvlist from it. + * We return from first successful read. + */ +nvlist_t * +vdev_read_bootenv(vdev_t *vdev) +{ + vdev_t *kid; + nvlist_t *benv; + vdev_boot_envblock_t *be; + char *command; + bool ok; + int rv; + + STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { + if (kid->v_state != VDEV_STATE_HEALTHY) + continue; + + benv = vdev_read_bootenv(kid); + if (benv != NULL) + return (benv); + } + + be = malloc(sizeof (*be)); + if (be == NULL) + return (NULL); + + rv = 0; + for (int l = 0; l < VDEV_LABELS; l++) { + rv = vdev_label_read(vdev, l, be, + offsetof(vdev_label_t, vl_be), + sizeof (*be)); + if (rv == 0) + break; + } + if (rv != 0) { + free(be); + return (NULL); + } + + be->vbe_version = be64toh(be->vbe_version); + switch (be->vbe_version) { + case VB_RAW: + /* + * we have textual data in vbe_bootenv, create nvlist + * with key "envmap". + */ + benv = nvlist_create(NV_UNIQUE_NAME); + if (benv != NULL) { + if (*be->vbe_bootenv == '\0') { + nvlist_add_uint64(benv, BOOTENV_VERSION, + VB_NVLIST); + break; + } + nvlist_add_uint64(benv, BOOTENV_VERSION, VB_RAW); + be->vbe_bootenv[sizeof (be->vbe_bootenv) - 1] = '\0'; + nvlist_add_string(benv, GRUB_ENVMAP, be->vbe_bootenv); + } + break; + + case VB_NVLIST: + benv = nvlist_import(be->vbe_bootenv, sizeof(be->vbe_bootenv)); + break; + + default: + command = (char *)be; + ok = false; + + /* Check for legacy zfsbootcfg command string */ + for (int i = 0; command[i] != '\0'; i++) { + if (iscntrl(command[i])) { + ok = false; + break; + } else { + ok = true; + } + } + benv = nvlist_create(NV_UNIQUE_NAME); + if (benv != NULL) { + if (ok) + nvlist_add_string(benv, FREEBSD_BOOTONCE, + command); + else + nvlist_add_uint64(benv, BOOTENV_VERSION, + VB_NVLIST); + } + break; + } + free(be); + return (benv); +} + static uint64_t vdev_get_label_asize(nvlist_t *nvl) { @@ -1621,7 +1890,7 @@ vdev_get_label_asize(nvlist_t *nvl) goto done; if (memcmp(type, VDEV_TYPE_RAIDZ, len) == 0) { - nvlist_t *kids; + nvlist_t **kids; int nkids; if (nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, @@ -1631,7 +1900,9 @@ vdev_get_label_asize(nvlist_t *nvl) } asize /= nkids; - nvlist_destroy(kids); + for (int i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); } asize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; @@ -1655,15 +1926,13 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg) return (NULL); for (int l = 0; l < VDEV_LABELS; l++) { - const unsigned char *nvlist; - if (vdev_label_read(vd, l, label, offsetof(vdev_label_t, vl_vdev_phys), sizeof (vdev_phys_t))) continue; - nvlist = (const unsigned char *) label->vp_nvlist; - tmp = nvlist_import(nvlist + 4, nvlist[0], nvlist[1]); + tmp = nvlist_import(label->vp_nvlist, + sizeof(label->vp_nvlist)); if (tmp == NULL) continue; @@ -1728,7 +1997,8 @@ vdev_uberblock_load(vdev_t *vd, uberblock_t *ub) } static int -vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) +vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, + spa_t **spap) { vdev_t vtmp; spa_t *spa; @@ -1746,8 +2016,9 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) */ memset(&vtmp, 0, sizeof(vtmp)); vtmp.v_phys_read = _read; - vtmp.v_read_priv = read_priv; - vtmp.v_psize = P2ALIGN(ldi_get_size(read_priv), + vtmp.v_phys_write = _write; + vtmp.v_priv = priv; + vtmp.v_psize = P2ALIGN(ldi_get_size(priv), (uint64_t)sizeof (vdev_label_t)); /* Test for minimum device size. */ @@ -1861,7 +2132,8 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap) vdev = vdev_find(guid); if (vdev != NULL) { vdev->v_phys_read = _read; - vdev->v_read_priv = read_priv; + vdev->v_phys_write = _write; + vdev->v_priv = priv; vdev->v_psize = vtmp.v_psize; /* * If no other state is set, mark vdev healthy. @@ -3132,7 +3404,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) dnode_phys_t dir; size_t size; int rc; - unsigned char *nv; + char *nv; *value = NULL; if ((rc = objset_get_dnode(spa, spa->spa_mos, obj, &dir)) != 0) @@ -3156,7 +3428,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) nv = NULL; return (rc); } - *value = nvlist_import(nv + 4, nv[0], nv[1]); + *value = nvlist_import(nv, size); free(nv); return (rc); } diff --git a/stand/loader.mk b/stand/loader.mk index 59fcb568c22c..f6341052ab71 100644 --- a/stand/loader.mk +++ b/stand/loader.mk @@ -6,7 +6,7 @@ CFLAGS+=-I${LDRSRC} SRCS+= boot.c commands.c console.c devopen.c interp.c SRCS+= interp_backslash.c interp_parse.c ls.c misc.c -SRCS+= module.c +SRCS+= module.c nvstore.c .if ${MACHINE} == "i386" || ${MACHINE_CPUARCH} == "amd64" SRCS+= load_elf32.c load_elf32_obj.c reloc_elf32.c diff --git a/stand/lua/config.lua b/stand/lua/config.lua index 1934fe795f06..6a898ce8cfb5 100644 --- a/stand/lua/config.lua +++ b/stand/lua/config.lua @@ -389,16 +389,24 @@ end local function checkNextboot() local nextboot_file = loader.getenv("nextboot_conf") + local nextboot_enable = loader.getenv("nextboot_enable") + if nextboot_file == nil then return end + -- is nextboot_enable set in nvstore? + if nextboot_enable == "NO" then + return + end + local text = readFile(nextboot_file, true) if text == nil then return end - if text:match("^nextboot_enable=\"NO\"") ~= nil then + if nextboot_enable == nil and + text:match("^nextboot_enable=\"NO\"") ~= nil then -- We're done; nextboot is not enabled return end @@ -421,6 +429,7 @@ local function checkNextboot() io.write(nfile, "nextboot_enable=\"NO\" ") io.close(nfile) end + loader.setenv("nextboot_enable", "NO") end -- Module exports diff --git a/stand/userboot/test/test.c b/stand/userboot/test/test.c index 301069a4d953..baf1b6243c1f 100644 --- a/stand/userboot/test/test.c +++ b/stand/userboot/test/test.c @@ -260,6 +260,21 @@ test_diskread(void *arg, int unit, uint64_t offset, void *dst, size_t size, return (0); } +int +test_diskwrite(void *arg, int unit, uint64_t offset, void *src, size_t size, + size_t *resid_return) +{ + ssize_t n; + + if (unit > disk_index || disk_fd[unit] == -1) + return (EIO); + n = pwrite(disk_fd[unit], src, size, offset); + if (n < 0) + return (errno); + *resid_return = size - n; + return (0); +} + int test_diskioctl(void *arg, int unit, u_long cmd, void *data) { @@ -399,6 +414,7 @@ struct loader_callbacks cb = { .stat = test_stat, .diskread = test_diskread, + .diskwrite = test_diskwrite, .diskioctl = test_diskioctl, .copyin = test_copyin, @@ -431,8 +447,9 @@ main(int argc, char** argv) void (*func)(struct loader_callbacks *, void *, int, int) __dead2; int opt; const char *userboot_obj = "/boot/userboot.so"; + int oflag = O_RDONLY; - while ((opt = getopt(argc, argv, "b:d:h:")) != -1) { + while ((opt = getopt(argc, argv, "wb:d:h:")) != -1) { switch (opt) { case 'b': userboot_obj = optarg; @@ -442,7 +459,7 @@ main(int argc, char** argv) disk_index++; disk_fd = reallocarray(disk_fd, disk_index + 1, sizeof (int)); - disk_fd[disk_index] = open(optarg, O_RDONLY); + disk_fd[disk_index] = open(optarg, oflag); if (disk_fd[disk_index] < 0) err(1, "Can't open disk image '%s'", optarg); break; @@ -451,6 +468,10 @@ main(int argc, char** argv) host_base = optarg; break; + case 'w': + oflag = O_RDWR; + break; + case '?': usage(); } diff --git a/stand/userboot/userboot.h b/stand/userboot/userboot.h index de0cdb6605c8..810e5b9d3d24 100644 --- a/stand/userboot/userboot.h +++ b/stand/userboot/userboot.h @@ -131,6 +131,12 @@ struct loader_callbacks { int (*diskread)(void *arg, int unit, uint64_t offset, void *dst, size_t size, size_t *resid_return); + /* + * Write to a disk image at the given offset + */ + int (*diskwrite)(void *arg, int unit, uint64_t offset, + void *src, size_t size, size_t *resid_return); + /* * Guest virtual machine i/o */ diff --git a/stand/userboot/userboot/Makefile b/stand/userboot/userboot/Makefile index a6e884be1040..f256ac2769e5 100644 --- a/stand/userboot/userboot/Makefile +++ b/stand/userboot/userboot/Makefile @@ -32,8 +32,9 @@ SRCS+= vers.c CFLAGS+= -Wall CFLAGS+= -I${BOOTSRC}/userboot - -CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common +CFLAGS.main.c+= -I${BOOTSRC}/libsa/zfs +CFLAGS.main.c+= -I${SYSDIR}/contrib/openzfs/include +CFLAGS.main.c+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs CWARNFLAGS.main.c += -Wno-implicit-function-declaration LDFLAGS+= -nostdlib -Wl,-Bsymbolic diff --git a/stand/userboot/userboot/main.c b/stand/userboot/userboot/main.c index 82851982fbcb..c094e987b0cb 100644 --- a/stand/userboot/userboot/main.c +++ b/stand/userboot/userboot/main.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "bootstrap.h" #include "disk.h" @@ -214,6 +215,16 @@ loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks) exit(0); } +static void +set_currdev(const char *devname) +{ + + env_setenv("currdev", EV_VOLATILE, devname, + userboot_setcurrdev, env_nounset); + env_setenv("loaddev", EV_VOLATILE, devname, + env_noset, env_nounset); +} + /* * Set the 'current device' by (if possible) recovering the boot device as * supplied by the initial bootstrap. @@ -225,6 +236,7 @@ extract_currdev(void) struct devdesc *dd; #if defined(USERBOOT_ZFS_SUPPORT) struct zfs_devdesc zdev; + char *buf = NULL; if (userboot_zfs_found) { @@ -257,10 +269,23 @@ extract_currdev(void) dd = &dev.dd; } - env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(dd), - userboot_setcurrdev, env_nounset); - env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(dd), - env_noset, env_nounset); + set_currdev(userboot_fmtdev(dd)); + +#if defined(USERBOOT_ZFS_SUPPORT) + if (userboot_zfs_found) { + buf = malloc(VDEV_PAD_SIZE); + if (buf != NULL) { + if (zfs_get_bootonce(&zdev, OS_BOOTONCE, buf, + VDEV_PAD_SIZE) == 0) { + printf("zfs bootonce: %s\n", buf); + set_currdev(buf); + setenv("zfs-bootonce", buf, 1); + } + free(buf); + (void) zfs_attach_nvstore(&zdev); + } + } +#endif } #if defined(USERBOOT_ZFS_SUPPORT) diff --git a/stand/userboot/userboot/userboot_disk.c b/stand/userboot/userboot/userboot_disk.c index a4214997007e..caaa3373812b 100644 --- a/stand/userboot/userboot/userboot_disk.c +++ b/stand/userboot/userboot/userboot_disk.c @@ -211,15 +211,21 @@ userdisk_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size, size_t resid; int rc; - rw &= F_MASK; - if (rw == F_WRITE) - return (EROFS); - if (rw != F_READ) - return (EINVAL); if (rsize) *rsize = 0; off = dblk * ud_info[dev->dd.d_unit].sectorsize; - rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + switch (rw & F_MASK) { + case F_READ: + rc = CALLBACK(diskread, dev->dd.d_unit, off, buf, size, &resid); + break; + case F_WRITE: + rc = CALLBACK(diskwrite, dev->dd.d_unit, off, buf, size, + &resid); + break; + default: + rc = EINVAL; + break; + } if (rc) return (rc); if (rsize) diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index 6a6331450e8b..a005f892508f 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -527,20 +527,20 @@ typedef struct vdev_phys { } vdev_phys_t; typedef enum vbe_vers { - /* The bootenv file is stored as ascii text in the envblock */ - VB_RAW = 0, + /* The bootenv file is stored as ascii text in the envblock */ + VB_RAW = 0, - /* - * The bootenv file is converted to an nvlist and then packed into the - * envblock. - */ - VB_NVLIST = 1 + /* + * The bootenv file is converted to an nvlist and then packed into the + * envblock. + */ + VB_NVLIST = 1 } vbe_vers_t; typedef struct vdev_boot_envblock { - uint64_t vbe_version; - char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - - sizeof (zio_eck_t)]; + uint64_t vbe_version; + char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - + sizeof (zio_eck_t)]; zio_eck_t vbe_zbt; } vdev_boot_envblock_t; @@ -1663,10 +1663,9 @@ typedef struct znode_phys { */ struct vdev; struct spa; -typedef int vdev_phys_read_t(struct vdev *vdev, void *priv, - off_t offset, void *buf, size_t bytes); -typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp, - void *buf, off_t offset, size_t bytes); +typedef int vdev_phys_read_t(struct vdev *, void *, off_t, void *, size_t); +typedef int vdev_phys_write_t(struct vdev *, off_t, void *, size_t); +typedef int vdev_read_t(struct vdev *, const blkptr_t *, void *, off_t, size_t); typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; @@ -1794,8 +1793,9 @@ typedef struct vdev { size_t v_nchildren; /* # children */ vdev_state_t v_state; /* current state */ vdev_phys_read_t *v_phys_read; /* read from raw leaf vdev */ + vdev_phys_write_t *v_phys_write; /* write to raw leaf vdev */ vdev_read_t *v_read; /* read from vdev */ - void *v_read_priv; /* private data for read function */ + void *v_priv; /* data for read/write function */ boolean_t v_islog; struct spa *v_spa; /* link to spa */ /* @@ -1822,10 +1822,11 @@ typedef struct spa { void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS]; boolean_t spa_with_log; /* this pool has log */ - struct uberblock spa_uberblock_master; /* best uberblock so far */ - objset_phys_t spa_mos_master; /* MOS for this pool */ - struct uberblock spa_uberblock_checkpoint; /* checkpoint uberblock */ - objset_phys_t spa_mos_checkpoint; /* Checkpoint MOS */ + struct uberblock spa_uberblock_master; /* best uberblock so far */ + objset_phys_t spa_mos_master; /* MOS for this pool */ + struct uberblock spa_uberblock_checkpoint; /* checkpoint uberblock */ + objset_phys_t spa_mos_checkpoint; /* Checkpoint MOS */ + void *spa_bootenv; /* bootenv from pool label */ } spa_t; /* IO related arguments. */ diff --git a/tools/tools/zfsboottest/zfsboottest.c b/tools/tools/zfsboottest/zfsboottest.c index 7406ddb77918..88f946e9d3fe 100644 --- a/tools/tools/zfsboottest/zfsboottest.c +++ b/tools/tools/zfsboottest/zfsboottest.c @@ -147,7 +147,7 @@ main(int argc, char** argv) warn("open(%s) failed", argv[i]); continue; } - if (vdev_probe(vdev_read, &fd[i - 1], NULL) != 0) { + if (vdev_probe(vdev_read, NULL, &fd[i - 1], NULL) != 0) { warnx("vdev_probe(%s) failed", argv[i]); close(fd[i - 1]); }