freebsd-nq/sys/boot/i386/zfsboot/zfsboot.c
Andriy Gapon 97371ba2a9 zfsbootcfg: a simple tool to set next boot (one time) options for zfsboot
(gpt)zfsboot will read one-time boot directives from a special ZFS pool
area.  The area was previously described as "Boot Block Header", but
currently it is know as Pad2, marked as reserved and is zeroed out on
pool creation.  The new code interprets data in this area, if any, using
the same format as boot.config.  The area is immediately wiped out.
Failure to parse the directives results in a reboot right after the
cleanup.  Otherwise the boot sequence proceeds as usual.

zfsbootcfg writes zfsboot arguments specified on its command line to the
Pad2 area of a disk identified by vfs.zfs.boot.primary_pool and
vfs.zfs.boot.primary_vdev kenv variables that are set by loader during
boot.  Please see the manual page for more.

Thanks to all who reviewed, contributed and made suggestions!  There are
many potential improvements to the feature, please see the review for
details.

Reviewed by:	wblock (docs)
Discussed with:	jhb, tsoome
MFC after:	3 weeks
Relnotes:	yes
Differential Revision: https://reviews.freebsd.org/D7612
2016-10-29 14:09:32 +00:00

1073 lines
24 KiB
C

/*-
* Copyright (c) 1998 Robert Nordier
* All rights reserved.
*
* Redistribution and use in source and binary forms are freely
* permitted provided that the above copyright notice and this
* paragraph and the following disclaimer are duplicated in all
* such forms.
*
* This software is provided "AS IS" and without any express or
* implied warranties, including, without limitation, the implied
* warranties of merchantability and fitness for a particular
* purpose.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/diskmbr.h>
#ifdef GPT
#include <sys/gpt.h>
#endif
#include <sys/reboot.h>
#include <sys/queue.h>
#include <machine/bootinfo.h>
#include <machine/elf.h>
#include <machine/pc/bios.h>
#include <stdarg.h>
#include <stddef.h>
#include <a.out.h>
#include <btxv86.h>
#include "lib.h"
#include "rbx.h"
#include "drv.h"
#include "util.h"
#include "cons.h"
#include "bootargs.h"
#include "paths.h"
#include "libzfs.h"
#define ARGS 0x900
#define NOPT 14
#define NDEV 3
#define BIOS_NUMDRIVES 0x475
#define DRV_HARD 0x80
#define DRV_MASK 0x7f
#define TYPE_AD 0
#define TYPE_DA 1
#define TYPE_MAXHARD TYPE_DA
#define TYPE_FD 2
#define DEV_GELIBOOT_BSIZE 4096
extern uint32_t _end;
#ifdef GPT
static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
#endif
static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
static const unsigned char flags[NOPT] = {
RBX_DUAL,
RBX_SERIAL,
RBX_ASKNAME,
RBX_CDROM,
RBX_CONFIG,
RBX_KDB,
RBX_GDB,
RBX_MUTE,
RBX_NOINTR,
RBX_PAUSE,
RBX_QUIET,
RBX_DFLTROOT,
RBX_SINGLE,
RBX_VERBOSE
};
uint32_t opts;
static const unsigned char dev_maj[NDEV] = {30, 4, 2};
static char cmd[512];
static char cmddup[512];
static char kname[1024];
static char rootname[256];
static int comspeed = SIOSPD;
static struct bootinfo bootinfo;
static uint32_t bootdev;
static struct zfs_boot_args zfsargs;
static struct zfsmount zfsmount;
vm_offset_t high_heap_base;
uint32_t bios_basemem, bios_extmem, high_heap_size;
static struct bios_smap smap;
/*
* The minimum amount of memory to reserve in bios_extmem for the heap.
*/
#define HEAP_MIN (64 * 1024 * 1024)
static char *heap_next;
static char *heap_end;
/* Buffers that must not span a 64k boundary. */
#define READ_BUF_SIZE 8192
struct dmadat {
char rdbuf[READ_BUF_SIZE]; /* for reading large things */
char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */
};
static struct dmadat *dmadat;
void exit(int);
void reboot(void);
static void load(void);
static int parse(void);
static void bios_getmem(void);
void *malloc(size_t n);
void free(void *ptr);
void *
malloc(size_t n)
{
char *p = heap_next;
if (p + n > heap_end) {
printf("malloc failure\n");
for (;;)
;
/* NOTREACHED */
return (0);
}
heap_next += n;
return (p);
}
void
free(void *ptr)
{
return;
}
static char *
strdup(const char *s)
{
char *p = malloc(strlen(s) + 1);
strcpy(p, s);
return (p);
}
#ifdef LOADER_GELI_SUPPORT
#include "geliboot.c"
static char gelipw[GELI_PW_MAXLEN];
#endif
#include "zfsimpl.c"
/*
* Read from a dnode (which must be from a ZPL filesystem).
*/
static int
zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
{
const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
size_t n;
int rc;
n = size;
if (*offp + n > zp->zp_size)
n = zp->zp_size - *offp;
rc = dnode_read(spa, dnode, *offp, start, n);
if (rc)
return (-1);
*offp += n;
return (n);
}
/*
* Current ZFS pool
*/
static spa_t *spa;
static spa_t *primary_spa;
static vdev_t *primary_vdev;
/*
* A wrapper for dskread that doesn't have to worry about whether the
* buffer pointer crosses a 64k boundary.
*/
static int
vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
{
char *p;
daddr_t lba, alignlba;
off_t diff;
unsigned int nb, alignnb;
struct dsk *dsk = (struct dsk *) priv;
if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
return -1;
p = buf;
lba = off / DEV_BSIZE;
lba += dsk->start;
/*
* Align reads to 4k else 4k sector GELIs will not decrypt.
* Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes.
*/
alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE;
/*
* The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the
* start of the GELI partition, not the start of the actual disk.
*/
alignlba += dsk->start;
diff = (lba - alignlba) * DEV_BSIZE;
while (bytes > 0) {
nb = bytes / DEV_BSIZE;
/*
* Ensure that the read size plus the leading offset does not
* exceed the size of the read buffer.
*/
if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE)
nb = (READ_BUF_SIZE - diff) / DEV_BSIZE;
/*
* Round the number of blocks to read up to the nearest multiple
* of DEV_GELIBOOT_BSIZE.
*/
alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE)
/ DEV_BSIZE;
if (drvread(dsk, dmadat->rdbuf, alignlba, alignnb))
return -1;
#ifdef LOADER_GELI_SUPPORT
/* decrypt */
if (is_geli(dsk) == 0) {
if (geli_read(dsk, ((alignlba - dsk->start) *
DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE))
return (-1);
}
#endif
memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE);
p += nb * DEV_BSIZE;
lba += nb;
alignlba += alignnb;
bytes -= nb * DEV_BSIZE;
/* Don't need the leading offset after the first block. */
diff = 0;
}
return 0;
}
static int
vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
{
char *p;
daddr_t lba;
unsigned int nb;
struct dsk *dsk = (struct dsk *) priv;
if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
return -1;
p = buf;
lba = off / DEV_BSIZE;
lba += dsk->start;
while (bytes > 0) {
nb = bytes / DEV_BSIZE;
if (nb > READ_BUF_SIZE / DEV_BSIZE)
nb = READ_BUF_SIZE / DEV_BSIZE;
memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE);
if (drvwrite(dsk, dmadat->rdbuf, lba, nb))
return -1;
p += nb * DEV_BSIZE;
lba += nb;
bytes -= nb * DEV_BSIZE;
}
return 0;
}
static int
xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
{
if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
printf("Invalid format\n");
return -1;
}
return 0;
}
/*
* Read Pad2 (formerly "Boot Block Header") area of the first
* vdev label of the given vdev.
*/
static int
vdev_read_pad2(vdev_t *vdev, char *buf, size_t size)
{
blkptr_t bp;
char *tmp = zap_scratch;
off_t off = offsetof(vdev_label_t, vl_pad2);
if (size > VDEV_PAD_SIZE)
size = VDEV_PAD_SIZE;
BP_ZERO(&bp);
BP_SET_LSIZE(&bp, VDEV_PAD_SIZE);
BP_SET_PSIZE(&bp, VDEV_PAD_SIZE);
BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
if (vdev_read_phys(vdev, &bp, tmp, off, 0))
return (EIO);
memcpy(buf, tmp, size);
return (0);
}
static int
vdev_clear_pad2(vdev_t *vdev)
{
char *zeroes = zap_scratch;
uint64_t *end;
off_t off = offsetof(vdev_label_t, vl_pad2);
memset(zeroes, 0, VDEV_PAD_SIZE);
end = (uint64_t *)(zeroes + VDEV_PAD_SIZE);
/* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */
end[-5] = 0x0210da7ab10c7a11;
end[-4] = 0x97f48f807f6e2a3f;
end[-3] = 0xaf909f1658aacefc;
end[-2] = 0xcbd1ea57ff6db48b;
end[-1] = 0x6ec692db0d465fab;
if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE))
return (EIO);
return (0);
}
static void
bios_getmem(void)
{
uint64_t size;
/* Parse system memory map */
v86.ebx = 0;
do {
v86.ctl = V86_FLAGS;
v86.addr = 0x15; /* int 0x15 function 0xe820*/
v86.eax = 0xe820;
v86.ecx = sizeof(struct bios_smap);
v86.edx = SMAP_SIG;
v86.es = VTOPSEG(&smap);
v86.edi = VTOPOFF(&smap);
v86int();
if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG))
break;
/* look for a low-memory segment that's large enough */
if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
(smap.length >= (512 * 1024)))
bios_basemem = smap.length;
/* look for the first segment in 'extended' memory */
if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
bios_extmem = smap.length;
}
/*
* Look for the largest segment in 'extended' memory beyond
* 1MB but below 4GB.
*/
if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
(smap.base < 0x100000000ull)) {
size = smap.length;
/*
* If this segment crosses the 4GB boundary, truncate it.
*/
if (smap.base + size > 0x100000000ull)
size = 0x100000000ull - smap.base;
if (size > high_heap_size) {
high_heap_size = size;
high_heap_base = smap.base;
}
}
} while (v86.ebx != 0);
/* Fall back to the old compatibility function for base memory */
if (bios_basemem == 0) {
v86.ctl = 0;
v86.addr = 0x12; /* int 0x12 */
v86int();
bios_basemem = (v86.eax & 0xffff) * 1024;
}
/* Fall back through several compatibility functions for extended memory */
if (bios_extmem == 0) {
v86.ctl = V86_FLAGS;
v86.addr = 0x15; /* int 0x15 function 0xe801*/
v86.eax = 0xe801;
v86int();
if (!V86_CY(v86.efl)) {
bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
}
}
if (bios_extmem == 0) {
v86.ctl = 0;
v86.addr = 0x15; /* int 0x15 function 0x88*/
v86.eax = 0x8800;
v86int();
bios_extmem = (v86.eax & 0xffff) * 1024;
}
/*
* If we have extended memory and did not find a suitable heap
* region in the SMAP, use the last 3MB of 'extended' memory as a
* high heap candidate.
*/
if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
high_heap_size = HEAP_MIN;
high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
}
}
/*
* Try to detect a device supported by the legacy int13 BIOS
*/
static int
int13probe(int drive)
{
v86.ctl = V86_FLAGS;
v86.addr = 0x13;
v86.eax = 0x800;
v86.edx = drive;
v86int();
if (!V86_CY(v86.efl) && /* carry clear */
((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */
if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */
return(0); /* skip device */
}
return (1);
}
return(0);
}
/*
* We call this when we find a ZFS vdev - ZFS consumes the dsk
* structure so we must make a new one.
*/
static struct dsk *
copy_dsk(struct dsk *dsk)
{
struct dsk *newdsk;
newdsk = malloc(sizeof(struct dsk));
*newdsk = *dsk;
return (newdsk);
}
static void
probe_drive(struct dsk *dsk)
{
#ifdef GPT
struct gpt_hdr hdr;
struct gpt_ent *ent;
unsigned part, entries_per_sec;
daddr_t slba;
#endif
#if defined(GPT) || defined(LOADER_GELI_SUPPORT)
daddr_t elba;
#endif
struct dos_partition *dp;
char *sec;
unsigned i;
/*
* If we find a vdev on the whole disk, stop here.
*/
if (vdev_probe(vdev_read, dsk, NULL) == 0)
return;
#ifdef LOADER_GELI_SUPPORT
/*
* Taste the disk, if it is GELI encrypted, decrypt it and check to see if
* it is a usable vdev then. Otherwise dig
* out the partition table and probe each slice/partition
* in turn for a vdev or GELI encrypted vdev.
*/
elba = drvsize(dsk);
if (elba > 0) {
elba--;
}
if (geli_taste(vdev_read, dsk, elba) == 0) {
if (geli_passphrase(&gelipw, dsk->unit, ':', 0, dsk) == 0) {
if (vdev_probe(vdev_read, dsk, NULL) == 0) {
return;
}
}
}
#endif /* LOADER_GELI_SUPPORT */
sec = dmadat->secbuf;
dsk->start = 0;
#ifdef GPT
/*
* First check for GPT.
*/
if (drvread(dsk, sec, 1, 1)) {
return;
}
memcpy(&hdr, sec, sizeof(hdr));
if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
goto trymbr;
}
/*
* Probe all GPT partitions for the presence of ZFS pools. We
* return the spa_t for the first we find (if requested). This
* will have the effect of booting from the first pool on the
* disk.
*
* If no vdev is found, GELI decrypting the device and try again
*/
entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
slba = hdr.hdr_lba_table;
elba = slba + hdr.hdr_entries / entries_per_sec;
while (slba < elba) {
dsk->start = 0;
if (drvread(dsk, sec, slba, 1))
return;
for (part = 0; part < entries_per_sec; part++) {
ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
sizeof(uuid_t)) == 0) {
dsk->start = ent->ent_lba_start;
dsk->slice = part + 1;
dsk->part = 255;
if (vdev_probe(vdev_read, dsk, NULL) == 0) {
/*
* This slice had a vdev. We need a new dsk
* structure now since the vdev now owns this one.
*/
dsk = copy_dsk(dsk);
}
#ifdef LOADER_GELI_SUPPORT
else if (geli_taste(vdev_read, dsk, ent->ent_lba_end -
ent->ent_lba_start) == 0) {
if (geli_passphrase(&gelipw, dsk->unit, 'p', dsk->slice, dsk) == 0) {
/*
* This slice has GELI, check it for ZFS.
*/
if (vdev_probe(vdev_read, dsk, NULL) == 0) {
/*
* This slice had a vdev. We need a new dsk
* structure now since the vdev now owns this one.
*/
dsk = copy_dsk(dsk);
}
break;
}
}
#endif /* LOADER_GELI_SUPPORT */
}
}
slba++;
}
return;
trymbr:
#endif /* GPT */
if (drvread(dsk, sec, DOSBBSECTOR, 1))
return;
dp = (void *)(sec + DOSPARTOFF);
for (i = 0; i < NDOSPART; i++) {
if (!dp[i].dp_typ)
continue;
dsk->start = dp[i].dp_start;
dsk->slice = i + 1;
if (vdev_probe(vdev_read, dsk, NULL) == 0) {
dsk = copy_dsk(dsk);
}
#ifdef LOADER_GELI_SUPPORT
else if (geli_taste(vdev_read, dsk, dp[i].dp_size -
dp[i].dp_start) == 0) {
if (geli_passphrase(&gelipw, dsk->unit, 's', i, dsk) == 0) {
/*
* This slice has GELI, check it for ZFS.
*/
if (vdev_probe(vdev_read, dsk, NULL) == 0) {
/*
* This slice had a vdev. We need a new dsk
* structure now since the vdev now owns this one.
*/
dsk = copy_dsk(dsk);
}
break;
}
}
#endif /* LOADER_GELI_SUPPORT */
}
}
int
main(void)
{
dnode_phys_t dn;
off_t off;
struct dsk *dsk;
int autoboot, i;
int nextboot;
int rc;
dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
bios_getmem();
if (high_heap_size > 0) {
heap_end = PTOV(high_heap_base + high_heap_size);
heap_next = PTOV(high_heap_base);
} else {
heap_next = (char *)dmadat + sizeof(*dmadat);
heap_end = (char *)PTOV(bios_basemem);
}
dsk = malloc(sizeof(struct dsk));
dsk->drive = *(uint8_t *)PTOV(ARGS);
dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
dsk->unit = dsk->drive & DRV_MASK;
dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
dsk->part = 0;
dsk->start = 0;
dsk->init = 0;
bootinfo.bi_version = BOOTINFO_VERSION;
bootinfo.bi_size = sizeof(bootinfo);
bootinfo.bi_basemem = bios_basemem / 1024;
bootinfo.bi_extmem = bios_extmem / 1024;
bootinfo.bi_memsizes_valid++;
bootinfo.bi_bios_dev = dsk->drive;
bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
dsk->slice, dsk->unit, dsk->part);
/* Process configuration file */
autoboot = 1;
#ifdef LOADER_GELI_SUPPORT
geli_init();
#endif
zfs_init();
/*
* Probe the boot drive first - we will try to boot from whatever
* pool we find on that drive.
*/
probe_drive(dsk);
/*
* Probe the rest of the drives that the bios knows about. This
* will find any other available pools and it may fill in missing
* vdevs for the boot pool.
*/
#ifndef VIRTUALBOX
for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
#else
for (i = 0; i < MAXBDDEV; i++)
#endif
{
if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
continue;
if (!int13probe(i | DRV_HARD))
break;
dsk = malloc(sizeof(struct dsk));
dsk->drive = i | DRV_HARD;
dsk->type = dsk->drive & TYPE_AD;
dsk->unit = i;
dsk->slice = 0;
dsk->part = 0;
dsk->start = 0;
dsk->init = 0;
probe_drive(dsk);
}
/*
* The first discovered pool, if any, is the pool.
*/
spa = spa_get_primary();
if (!spa) {
printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
for (;;)
;
}
primary_spa = spa;
primary_vdev = spa_get_primary_vdev(spa);
nextboot = 0;
rc = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd));
if (vdev_clear_pad2(primary_vdev))
printf("failed to clear pad2 area of primary vdev\n");
if (rc == 0) {
if (*cmd) {
/*
* We could find an old-style ZFS Boot Block header here.
* Simply ignore it.
*/
if (*(uint64_t *)cmd != 0x2f5b007b10c) {
/*
* Note that parse() is destructive to cmd[] and we also want
* to honor RBX_QUIET option that could be present in cmd[].
*/
nextboot = 1;
memcpy(cmddup, cmd, sizeof(cmd));
if (parse()) {
printf("failed to parse pad2 area of primary vdev\n");
reboot();
}
if (!OPT_CHECK(RBX_QUIET))
printf("zfs nextboot: %s\n", cmddup);
}
/* Do not process this command twice */
*cmd = 0;
}
} else
printf("failed to read pad2 area of primary vdev\n");
/* Mount ZFS only if it's not already mounted via nextboot parsing. */
if (zfsmount.spa == NULL &&
(zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) {
printf("%s: failed to mount default pool %s\n",
BOOTPROG, spa->spa_name);
autoboot = 0;
} else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
off = 0;
zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
}
if (*cmd) {
/*
* Note that parse() is destructive to cmd[] and we also want
* to honor RBX_QUIET option that could be present in cmd[].
*/
memcpy(cmddup, cmd, sizeof(cmd));
if (parse())
autoboot = 0;
if (!OPT_CHECK(RBX_QUIET))
printf("%s: %s\n", PATH_CONFIG, cmddup);
/* Do not process this command twice */
*cmd = 0;
}
/* Do not risk waiting at the prompt forever. */
if (nextboot && !autoboot)
reboot();
/*
* Try to exec /boot/loader. If interrupted by a keypress,
* or in case of failure, try to load a kernel directly instead.
*/
if (autoboot && !*kname) {
memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS));
if (!keyhit(3)) {
load();
memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
}
}
/* Present the user with the boot2 prompt. */
for (;;) {
if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
printf("\nFreeBSD/x86 boot\n");
if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
printf("Default: %s/<0x%llx>:%s\n"
"boot: ",
spa->spa_name, zfsmount.rootobj, kname);
else if (rootname[0] != '\0')
printf("Default: %s/%s:%s\n"
"boot: ",
spa->spa_name, rootname, kname);
else
printf("Default: %s:%s\n"
"boot: ",
spa->spa_name, kname);
}
if (ioctrl & IO_SERIAL)
sio_flush();
if (!autoboot || keyhit(5))
getstr(cmd, sizeof(cmd));
else if (!autoboot || !OPT_CHECK(RBX_QUIET))
putchar('\n');
autoboot = 0;
if (parse())
putchar('\a');
else
load();
}
}
/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
void
exit(int x)
{
__exit(x);
}
void
reboot(void)
{
__exit(0);
}
static void
load(void)
{
union {
struct exec ex;
Elf32_Ehdr eh;
} hdr;
static Elf32_Phdr ep[2];
static Elf32_Shdr es[2];
caddr_t p;
dnode_phys_t dn;
off_t off;
uint32_t addr, x;
int fmt, i, j;
if (zfs_lookup(&zfsmount, kname, &dn)) {
printf("\nCan't find %s\n", kname);
return;
}
off = 0;
if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
return;
if (N_GETMAGIC(hdr.ex) == ZMAGIC)
fmt = 0;
else if (IS_ELF(hdr.eh))
fmt = 1;
else {
printf("Invalid %s\n", "format");
return;
}
if (fmt == 0) {
addr = hdr.ex.a_entry & 0xffffff;
p = PTOV(addr);
off = PAGE_SIZE;
if (xfsread(&dn, &off, p, hdr.ex.a_text))
return;
p += roundup2(hdr.ex.a_text, PAGE_SIZE);
if (xfsread(&dn, &off, p, hdr.ex.a_data))
return;
p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
bootinfo.bi_symtab = VTOP(p);
memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
p += sizeof(hdr.ex.a_syms);
if (hdr.ex.a_syms) {
if (xfsread(&dn, &off, p, hdr.ex.a_syms))
return;
p += hdr.ex.a_syms;
if (xfsread(&dn, &off, p, sizeof(int)))
return;
x = *(uint32_t *)p;
p += sizeof(int);
x -= sizeof(int);
if (xfsread(&dn, &off, p, x))
return;
p += x;
}
} else {
off = hdr.eh.e_phoff;
for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
return;
if (ep[j].p_type == PT_LOAD)
j++;
}
for (i = 0; i < 2; i++) {
p = PTOV(ep[i].p_paddr & 0xffffff);
off = ep[i].p_offset;
if (xfsread(&dn, &off, p, ep[i].p_filesz))
return;
}
p += roundup2(ep[1].p_memsz, PAGE_SIZE);
bootinfo.bi_symtab = VTOP(p);
if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
off = hdr.eh.e_shoff + sizeof(es[0]) *
(hdr.eh.e_shstrndx + 1);
if (xfsread(&dn, &off, &es, sizeof(es)))
return;
for (i = 0; i < 2; i++) {
memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
p += sizeof(es[i].sh_size);
off = es[i].sh_offset;
if (xfsread(&dn, &off, p, es[i].sh_size))
return;
p += es[i].sh_size;
}
}
addr = hdr.eh.e_entry & 0xffffff;
}
bootinfo.bi_esymtab = VTOP(p);
bootinfo.bi_kernelname = VTOP(kname);
zfsargs.size = sizeof(zfsargs);
zfsargs.pool = zfsmount.spa->spa_guid;
zfsargs.root = zfsmount.rootobj;
zfsargs.primary_pool = primary_spa->spa_guid;
#ifdef LOADER_GELI_SUPPORT
bcopy(gelipw, zfsargs.gelipw, sizeof(zfsargs.gelipw));
bzero(gelipw, sizeof(gelipw));
#else
zfsargs.gelipw[0] = '\0';
#endif
if (primary_vdev != NULL)
zfsargs.primary_vdev = primary_vdev->v_guid;
else
printf("failed to detect primary vdev\n");
__exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
bootdev,
KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
(uint32_t) spa->spa_guid,
(uint32_t) (spa->spa_guid >> 32),
VTOP(&bootinfo),
zfsargs);
}
static int
zfs_mount_ds(char *dsname)
{
uint64_t newroot;
spa_t *newspa;
char *q;
q = strchr(dsname, '/');
if (q)
*q++ = '\0';
newspa = spa_find_by_name(dsname);
if (newspa == NULL) {
printf("\nCan't find ZFS pool %s\n", dsname);
return -1;
}
if (zfs_spa_init(newspa))
return -1;
newroot = 0;
if (q) {
if (zfs_lookup_dataset(newspa, q, &newroot)) {
printf("\nCan't find dataset %s in ZFS pool %s\n",
q, newspa->spa_name);
return -1;
}
}
if (zfs_mount(newspa, newroot, &zfsmount)) {
printf("\nCan't mount ZFS dataset\n");
return -1;
}
spa = newspa;
return (0);
}
static int
parse(void)
{
char *arg = cmd;
char *ep, *p, *q;
const char *cp;
int c, i, j;
while ((c = *arg++)) {
if (c == ' ' || c == '\t' || c == '\n')
continue;
for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
ep = p;
if (*p)
*p++ = 0;
if (c == '-') {
while ((c = *arg++)) {
if (c == 'P') {
if (*(uint8_t *)PTOV(0x496) & 0x10) {
cp = "yes";
} else {
opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
cp = "no";
}
printf("Keyboard: %s\n", cp);
continue;
} else if (c == 'S') {
j = 0;
while ((unsigned int)(i = *arg++ - '0') <= 9)
j = j * 10 + i;
if (j > 0 && i == -'0') {
comspeed = j;
break;
}
/* Fall through to error below ('S' not in optstr[]). */
}
for (i = 0; c != optstr[i]; i++)
if (i == NOPT - 1)
return -1;
opts ^= OPT_SET(flags[i]);
}
ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
if (ioctrl & IO_SERIAL) {
if (sio_init(115200 / comspeed) != 0)
ioctrl &= ~IO_SERIAL;
}
} if (c == '?') {
dnode_phys_t dn;
if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
zap_list(spa, &dn);
}
return -1;
} else {
arg--;
/*
* Report pool status if the comment is 'status'. Lets
* hope no-one wants to load /status as a kernel.
*/
if (!strcmp(arg, "status")) {
spa_all_status();
return -1;
}
/*
* If there is "zfs:" prefix simply ignore it.
*/
if (strncmp(arg, "zfs:", 4) == 0)
arg += 4;
/*
* If there is a colon, switch pools.
*/
q = strchr(arg, ':');
if (q) {
*q++ = '\0';
if (zfs_mount_ds(arg) != 0)
return -1;
arg = q;
}
if ((i = ep - arg)) {
if ((size_t)i >= sizeof(kname))
return -1;
memcpy(kname, arg, i + 1);
}
}
arg = p;
}
return 0;
}