kboot: aarch64 trampoline implementation
Update exec.c (copyied from efi/loader/arch/arm64/exec.c) to allow execution of aarch64 kernels. This includes a new trampoline code that handles copying the UEFI memory map, if available from the Linux FDT provided PA. This is a complete implementation now, able to boot from the LinuxBoot environment on an aarch64 server that only offers LinuxBoot (though a workaround for the gicv3 inability to re-init is not yet in FreeBSD). Many 'fit and finish' issues will be addressed in subsequent commits. Sponsored by: Netflix Reviewed by: tsoome, kevans, andrew Differential Revision: https://reviews.freebsd.org/D38258
This commit is contained in:
parent
0928550c3e
commit
dfcca21075
@ -34,16 +34,17 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/linker.h>
|
||||
#include <machine/elf.h>
|
||||
|
||||
#include <bootstrap.h>
|
||||
|
||||
#ifdef EFI
|
||||
#include <efi.h>
|
||||
#include <efilib.h>
|
||||
|
||||
#include "loader_efi.h"
|
||||
|
||||
#else
|
||||
#include "host_syscall.h"
|
||||
#endif
|
||||
#include <machine/metadata.h>
|
||||
|
||||
#include "bootstrap.h"
|
||||
#include "kboot.h"
|
||||
#include "bootstrap.h"
|
||||
|
||||
#include "platform/acfreebsd.h"
|
||||
@ -54,6 +55,10 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include "cache.h"
|
||||
|
||||
#ifndef EFI
|
||||
#define LOADER_PAGE_SIZE PAGE_SIZE
|
||||
#endif
|
||||
|
||||
#ifdef EFI
|
||||
static EFI_GUID acpi_guid = ACPI_TABLE_GUID;
|
||||
static EFI_GUID acpi20_guid = ACPI_20_TABLE_GUID;
|
||||
@ -62,13 +67,14 @@ static EFI_GUID acpi20_guid = ACPI_20_TABLE_GUID;
|
||||
static int elf64_exec(struct preloaded_file *amp);
|
||||
static int elf64_obj_exec(struct preloaded_file *amp);
|
||||
|
||||
/* Stub out temporarily */
|
||||
static int
|
||||
bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp,
|
||||
bool exit_bs)
|
||||
{
|
||||
return EINVAL;
|
||||
}
|
||||
bool do_mem_map = false;
|
||||
|
||||
extern uint32_t efi_map_size;
|
||||
extern vm_paddr_t efi_map_phys_src; /* From DTB */
|
||||
extern vm_paddr_t efi_map_phys_dst; /* From our memory map metadata module */
|
||||
|
||||
int bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp,
|
||||
bool exit_bs);
|
||||
|
||||
static struct file_format arm64_elf = {
|
||||
elf64_loadfile,
|
||||
@ -80,21 +86,47 @@ struct file_format *file_formats[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
#ifndef EFI
|
||||
extern uintptr_t tramp;
|
||||
extern uint32_t tramp_size;
|
||||
extern uint32_t tramp_data_offset;
|
||||
|
||||
struct trampoline_data {
|
||||
uint64_t entry; // 0 (PA where kernel loaded)
|
||||
uint64_t modulep; // 8 module metadata
|
||||
uint64_t memmap_src; // 16 Linux-provided memory map PA
|
||||
uint64_t memmap_dst; // 24 Module data copy PA
|
||||
uint64_t memmap_len; // 32 Length to copy
|
||||
};
|
||||
#endif
|
||||
|
||||
extern vm_offset_t kboot_get_phys_load_segment(void);
|
||||
|
||||
static int
|
||||
elf64_exec(struct preloaded_file *fp)
|
||||
{
|
||||
vm_offset_t modulep, kernendp;
|
||||
vm_offset_t clean_addr;
|
||||
size_t clean_size;
|
||||
struct file_metadata *md;
|
||||
Elf_Ehdr *ehdr;
|
||||
#ifdef EFI
|
||||
vm_offset_t clean_addr;
|
||||
size_t clean_size;
|
||||
void (*entry)(vm_offset_t);
|
||||
int err;
|
||||
#else
|
||||
vm_offset_t trampolinebase;
|
||||
vm_offset_t staging;
|
||||
void *trampcode;
|
||||
uint64_t *trampoline;
|
||||
struct trampoline_data *trampoline_data;
|
||||
int nseg;
|
||||
void *kseg;
|
||||
#endif
|
||||
struct file_metadata *md;
|
||||
Elf_Ehdr *ehdr;
|
||||
int error;
|
||||
#ifdef EFI
|
||||
ACPI_TABLE_RSDP *rsdp;
|
||||
char buf[24];
|
||||
int revision;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Report the RSDP to the kernel. The old code used the 'hints' method
|
||||
* to communite this to the kernel. However, while convenient, the
|
||||
@ -103,7 +135,6 @@ elf64_exec(struct preloaded_file *fp)
|
||||
* that start with acpi. The old 'hints' can be removed before we branch
|
||||
* for FreeBSD 15.
|
||||
*/
|
||||
#ifdef EFI
|
||||
rsdp = efi_get_table(&acpi20_guid);
|
||||
if (rsdp == NULL) {
|
||||
rsdp = efi_get_table(&acpi_guid);
|
||||
@ -137,6 +168,46 @@ elf64_exec(struct preloaded_file *fp)
|
||||
}
|
||||
}
|
||||
#else
|
||||
vm_offset_t rsdp;
|
||||
rsdp = acpi_rsdp();
|
||||
if (rsdp != 0) {
|
||||
char buf[24];
|
||||
|
||||
printf("Found ACPI 2.0 at %#016lx\n", rsdp);
|
||||
sprintf(buf, "0x%016llx", (unsigned long long)rsdp);
|
||||
setenv("hint.acpi.0.rsdp", buf, 1); /* For 13.1R bootability */
|
||||
setenv("acpi.rsdp", buf, 1);
|
||||
/* Nobody uses the rest of that stuff */
|
||||
}
|
||||
|
||||
|
||||
// XXX Question: why not just use malloc?
|
||||
trampcode = host_getmem(LOADER_PAGE_SIZE);
|
||||
if (trampcode == NULL) {
|
||||
printf("Unable to allocate trampoline\n");
|
||||
return (ENOMEM);
|
||||
}
|
||||
bzero((void *)trampcode, LOADER_PAGE_SIZE);
|
||||
bcopy((void *)&tramp, (void *)trampcode, tramp_size);
|
||||
trampoline = (void *)trampcode;
|
||||
|
||||
/*
|
||||
* Figure out where to put it.
|
||||
*
|
||||
* Linux does not allow us to kexec_load into any part of memory. Ask
|
||||
* arch_loadaddr to resolve the first available chunk of physical memory
|
||||
* where loading is possible (load_addr).
|
||||
*
|
||||
* The kernel is loaded at the 'base' address in continguous physical
|
||||
* memory. We use the 2MB in front of the kernel as a place to put our
|
||||
* trampoline, but that's really overkill since we only need ~100 bytes.
|
||||
* The arm64 kernel's entry requirements are only 'load the kernel at a
|
||||
* 2MB alignment' and it figures out the rest, creates the right page
|
||||
* tables, etc.
|
||||
*/
|
||||
staging = kboot_get_phys_load_segment();
|
||||
printf("Load address at %#jx\n", (uintmax_t)staging);
|
||||
printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset);
|
||||
#endif
|
||||
|
||||
if ((md = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL)
|
||||
@ -147,33 +218,72 @@ elf64_exec(struct preloaded_file *fp)
|
||||
entry = efi_translate(ehdr->e_entry);
|
||||
|
||||
efi_time_fini();
|
||||
#else
|
||||
entry = (void *)ehdr->e_entry;
|
||||
#endif
|
||||
err = bi_load(fp->f_args, &modulep, &kernendp, true);
|
||||
if (err != 0) {
|
||||
error = bi_load(fp->f_args, &modulep, &kernendp, true);
|
||||
if (error != 0) {
|
||||
#ifdef EFI
|
||||
efi_time_init();
|
||||
#endif
|
||||
return (err);
|
||||
return (error);
|
||||
}
|
||||
|
||||
dev_cleanup();
|
||||
|
||||
/* Clean D-cache under kernel area and invalidate whole I-cache */
|
||||
#ifdef EFI
|
||||
/* Clean D-cache under kernel area and invalidate whole I-cache */
|
||||
clean_addr = (vm_offset_t)efi_translate(fp->f_addr);
|
||||
clean_size = (vm_offset_t)efi_translate(kernendp) - clean_addr;
|
||||
#else
|
||||
clean_addr = (vm_offset_t)fp->f_addr;
|
||||
clean_size = (vm_offset_t)kernendp - clean_addr;
|
||||
#endif
|
||||
|
||||
cpu_flush_dcache((void *)clean_addr, clean_size);
|
||||
cpu_inval_icache();
|
||||
|
||||
(*entry)(modulep);
|
||||
|
||||
#else
|
||||
/* Linux will flush the caches, just pass this data into our trampoline and go */
|
||||
trampoline_data = (void *)trampoline + tramp_data_offset;
|
||||
memset(trampoline_data, 0, sizeof(*trampoline_data));
|
||||
trampoline_data->entry = ehdr->e_entry - fp->f_addr + staging;
|
||||
trampoline_data->modulep = modulep;
|
||||
printf("Modulep = %jx\n", (uintmax_t)modulep);
|
||||
if (efi_map_phys_src != 0) {
|
||||
md = file_findmetadata(fp, MODINFOMD_EFI_MAP);
|
||||
if (md == NULL || md->md_addr == 0) {
|
||||
printf("Need to copy EFI MAP, but EFI MAP not found. %p\n", md);
|
||||
} else {
|
||||
printf("Metadata EFI map loaded at VA %lx\n", md->md_addr);
|
||||
efi_map_phys_dst = md->md_addr + staging +
|
||||
roundup2(sizeof(struct efi_map_header), 16) - fp->f_addr;
|
||||
trampoline_data->memmap_src = efi_map_phys_src;
|
||||
trampoline_data->memmap_dst = efi_map_phys_dst;
|
||||
trampoline_data->memmap_len = efi_map_size - roundup2(sizeof(struct efi_map_header), 16);
|
||||
printf("Copying UEFI Memory Map data from %#lx to %#lx %ld bytes\n",
|
||||
efi_map_phys_src,
|
||||
trampoline_data->memmap_dst,
|
||||
trampoline_data->memmap_len);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copy the trampoline to the ksegs. Since we're just bouncing off of
|
||||
* this into the kernel, no need to preserve the pages. On arm64, the
|
||||
* kernel sets up the initial page table, so we don't have to preserve
|
||||
* the memory used for the trampoline past when it calls the kernel.
|
||||
*/
|
||||
printf("kernendp = %#llx\n", (long long)kernendp);
|
||||
trampolinebase = staging + (kernendp - fp->f_addr);
|
||||
printf("trampolinebase = %#llx\n", (long long)trampolinebase);
|
||||
archsw.arch_copyin((void *)trampcode, kernendp, tramp_size);
|
||||
printf("Trampoline bouncing to %#llx\n", (long long)trampoline_data->entry);
|
||||
|
||||
if (archsw.arch_kexec_kseg_get == NULL)
|
||||
panic("architecture did not provide kexec segment mapping");
|
||||
archsw.arch_kexec_kseg_get(&nseg, &kseg);
|
||||
error = host_kexec_load(trampolinebase, nseg, kseg, HOST_KEXEC_ARCH_AARCH64);
|
||||
if (error != 0)
|
||||
panic("kexec_load returned error: %d", error);
|
||||
host_reboot(HOST_REBOOT_MAGIC1, HOST_REBOOT_MAGIC2, HOST_REBOOT_CMD_KEXEC, 0);
|
||||
#endif
|
||||
|
||||
panic("exec returned");
|
||||
}
|
||||
|
||||
|
@ -21,47 +21,81 @@
|
||||
* struct trampoline_data {
|
||||
* uint64_t entry; // 0 (PA where kernel loaded)
|
||||
* uint64_t modulep; // 8 module metadata
|
||||
* uint64_t memmap_src; // 16 Linux-provided memory map PA
|
||||
* uint64_t memmap_dst; // 24 Module data copy PA
|
||||
* uint64_t memmap_len; // 32 Length to copy
|
||||
* };
|
||||
*
|
||||
* The aarch64 _start routine assumes:
|
||||
* FreeBSD's arm64 entry point is _start which assumes:
|
||||
* MMU on with an identity map, or off
|
||||
* D-Cache: off
|
||||
* I-Cache: on or off
|
||||
* We are loaded at a 2MiB aligned address
|
||||
* Module data (modulep) pointer in x0
|
||||
*
|
||||
* Unlike EFI, we don't support copying the staging area. We tell Linunx to land
|
||||
* the kernel in its final location with the needed alignment, etc.
|
||||
* The rest of the boot loader tells Linux to land the kernel in its final
|
||||
* location with the needed alignment, etc. It does this, and then we take over.
|
||||
*
|
||||
* This trampoline installs sets up the arguments the kernel expects, flushes
|
||||
* the cache lines and jumps to the kernel _start address. We pass the modulep
|
||||
* pointer in x0, as _start expects.
|
||||
* The linux kernel will helpfully turn off the MMU, flush the caches, disables
|
||||
* them, etc. It calls the tramp with two args: FDT blob addresss in x0 and the
|
||||
* EL2 vectors in x1. Currently, we make use of neither of these parameters: we
|
||||
* pass whatever dtb we think we need as part of the module data and we're a bit
|
||||
* weak on hypervisor support at the moment. _start's requirements are all
|
||||
* satisifed.
|
||||
*
|
||||
* This trampoline sets up the arguments the kernel expects and jumps to the
|
||||
* kernel _start address. We pass the modulep pointer in x0, as _start
|
||||
* expects. We assume that the various cache flushing, invalidation, etc that
|
||||
* linux did during or after copying the data down is sufficient, though we may
|
||||
* need to be mindful of cache flushing if we run in EL2 (TBD).
|
||||
*
|
||||
* Note, if TRAMP_MEMMAP_SRC is non-zero, then we have to copy the Linux
|
||||
* provided UEFI memory map. It's easier to do that here. In kboot we couldn't
|
||||
* access the physical memory, and it's a chicken and egg problem later in the
|
||||
* kernel.
|
||||
*/
|
||||
|
||||
#define TRAMP_ENTRY 0
|
||||
#define TRAMP_MODULEP 8
|
||||
#define TRAMP_MEMMAP_SRC 16
|
||||
#define TRAMP_MEMMAP_DST 24
|
||||
#define TRAMP_MEMMAP_LEN 32
|
||||
#define TRAMP_TOTAL 40
|
||||
|
||||
.text
|
||||
.globl aarch64_tramp
|
||||
aarch64_tramp:
|
||||
b 1f /* skip over our saved args */
|
||||
.p2align 3
|
||||
trampoline_data:
|
||||
#define TRAMP_ENTRY 0
|
||||
#define TRAMP_MODULEP 8
|
||||
#define TRAMP_TOTAL 16
|
||||
.space TRAMP_TOTAL
|
||||
#define TMPSTACKSIZE 48 /* 16 bytes for args +8 for pushq/popfq + 24 spare */
|
||||
.globl tramp
|
||||
tramp:
|
||||
adr x8, trampoline_data
|
||||
ldr x10, [x8, #TRAMP_MEMMAP_SRC]
|
||||
cmp x10, xzr
|
||||
b.eq 9f
|
||||
|
||||
/*
|
||||
* Copy over the memory map into area we have reserved for it. Assume
|
||||
* the copy is a multiple of 8, since we know table entries are made up
|
||||
* of several 64-bit quantities.
|
||||
*/
|
||||
ldp x11, x12, [x8, #TRAMP_MEMMAP_DST] /* x12 = len */
|
||||
1:
|
||||
adr x2, trampoline_data
|
||||
ldr x1, [x2, #TRAMP_ENTRY]
|
||||
ldr x0, [x2, #TRAMP_MODULEP]
|
||||
br x1
|
||||
ldr x13, [x10], #8
|
||||
str x13, [x11], #8
|
||||
subs x12, x12, #8
|
||||
b.hi 1b
|
||||
9:
|
||||
ldp x9, x0, [x8, #TRAMP_ENTRY] /* x0 = modulep */
|
||||
br x9
|
||||
|
||||
.p2align 4
|
||||
trampoline_data:
|
||||
.space TRAMP_TOTAL
|
||||
#define TMPSTACKSIZE 48 /* 16 bytes for args +8 for pushq/popfq + 24 spare */
|
||||
.space TMPSTACKSIZE
|
||||
aarch64_tramp_end: /* padding doubles as stack */
|
||||
tramp_end: /* padding doubles as stack */
|
||||
|
||||
.data
|
||||
.globl aarch64_tramp_size
|
||||
aarch64_tramp_size:
|
||||
.long aarch64_tramp_end-aarch64_tramp
|
||||
.globl aarch64_tramp_data_offset
|
||||
aarch64_tramp_data_offset:
|
||||
.long trampoline_data-aarch64_tramp
|
||||
.globl tramp_size
|
||||
tramp_size:
|
||||
.long tramp_end-tramp
|
||||
.globl tramp_data_offset
|
||||
tramp_data_offset:
|
||||
.long trampoline_data-tramp
|
||||
|
Loading…
Reference in New Issue
Block a user