From 8de1ad0b9b76017772d387249c79f58e81672648 Mon Sep 17 00:00:00 2001 From: Wojciech Macek Date: Mon, 29 Jan 2018 09:24:28 +0000 Subject: [PATCH] loader: support for mixed-endianness ELF/loader and POWER8 On POWER8 with current petitpoot, the loader.kboot might be run as little-endian application. The FreeBSD kernel is always big-endian, so the load_elf_* routines must be aware of proper endianness of all fields. Submitted by: Wojciech Macek Obtained from: Semihalf Sponsored by: IBM, QCM Technologies Differential revision: https://reviews.freebsd.org/D12422 --- stand/common/bootstrap.h | 3 + stand/common/load_elf.c | 205 +++++++++++++++++++++++- stand/powerpc/kboot/conf.c | 1 + stand/powerpc/kboot/host_syscall.S | 11 +- stand/powerpc/kboot/host_syscall.h | 13 +- stand/powerpc/kboot/hostdisk.c | 8 +- stand/powerpc/kboot/kerneltramp.S | 55 ++++++- stand/powerpc/kboot/main.c | 200 ++++++++++++++++++++++- stand/powerpc/kboot/metadata.c | 23 +-- stand/powerpc/kboot/ppc64_elf_freebsd.c | 81 +++++++--- 10 files changed, 549 insertions(+), 51 deletions(-) diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h index a570720b4250..4672a6c28483 100644 --- a/stand/common/bootstrap.h +++ b/stand/common/bootstrap.h @@ -315,6 +315,9 @@ struct arch_switch /* Probe ZFS pool(s), if needed. */ void (*arch_zfs_probe)(void); + + /* For kexec-type loaders, get ksegment structure */ + void (*arch_kexec_kseg_get)(int *nseg, void **kseg); }; extern struct arch_switch archsw; diff --git a/stand/common/load_elf.c b/stand/common/load_elf.c index 4c1277b71c83..69a5e9ae3dfd 100644 --- a/stand/common/load_elf.c +++ b/stand/common/load_elf.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include @@ -118,11 +119,68 @@ __elfN(load_elf_header)(char *filename, elf_file_t ef) err = EFTYPE; goto error; } + if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || /* Layout ? */ ehdr->e_ident[EI_DATA] != ELF_TARG_DATA || - ehdr->e_ident[EI_VERSION] != EV_CURRENT || /* Version ? */ - ehdr->e_version != EV_CURRENT || - ehdr->e_machine != ELF_TARG_MACH) { /* Machine ? */ + ehdr->e_ident[EI_VERSION] != EV_CURRENT) /* Version ? */ { + err = EFTYPE; + goto error; + } + + /* + * Fixup ELF endianness. + * + * The Xhdr structure was loaded using block read call to + * optimize file accesses. It might happen, that the endianness + * of the system memory is different that endianness of + * the ELF header. + * Swap fields here to guarantee that Xhdr always contain + * valid data regardless of architecture. + */ + if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { + ehdr->e_type = be16toh(ehdr->e_type); + ehdr->e_machine = be16toh(ehdr->e_machine); + ehdr->e_version = be32toh(ehdr->e_version); + if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) { + ehdr->e_entry = be64toh(ehdr->e_entry); + ehdr->e_phoff = be64toh(ehdr->e_phoff); + ehdr->e_shoff = be64toh(ehdr->e_shoff); + } else { + ehdr->e_entry = be32toh(ehdr->e_entry); + ehdr->e_phoff = be32toh(ehdr->e_phoff); + ehdr->e_shoff = be32toh(ehdr->e_shoff); + } + ehdr->e_flags = be32toh(ehdr->e_flags); + ehdr->e_ehsize = be16toh(ehdr->e_ehsize); + ehdr->e_phentsize = be16toh(ehdr->e_phentsize); + ehdr->e_phnum = be16toh(ehdr->e_phnum); + ehdr->e_shentsize = be16toh(ehdr->e_shentsize); + ehdr->e_shnum = be16toh(ehdr->e_shnum); + ehdr->e_shstrndx = be16toh(ehdr->e_shstrndx); + + } else { + ehdr->e_type = le16toh(ehdr->e_type); + ehdr->e_machine = le16toh(ehdr->e_machine); + ehdr->e_version = le32toh(ehdr->e_version); + if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) { + ehdr->e_entry = le64toh(ehdr->e_entry); + ehdr->e_phoff = le64toh(ehdr->e_phoff); + ehdr->e_shoff = le64toh(ehdr->e_shoff); + } else { + ehdr->e_entry = le32toh(ehdr->e_entry); + ehdr->e_phoff = le32toh(ehdr->e_phoff); + ehdr->e_shoff = le32toh(ehdr->e_shoff); + } + ehdr->e_flags = le32toh(ehdr->e_flags); + ehdr->e_ehsize = le16toh(ehdr->e_ehsize); + ehdr->e_phentsize = le16toh(ehdr->e_phentsize); + ehdr->e_phnum = le16toh(ehdr->e_phnum); + ehdr->e_shentsize = le16toh(ehdr->e_shentsize); + ehdr->e_shnum = le16toh(ehdr->e_shnum); + ehdr->e_shstrndx = le16toh(ehdr->e_shstrndx); + } + + if (ehdr->e_version != EV_CURRENT || ehdr->e_machine != ELF_TARG_MACH) { /* Machine ? */ err = EFTYPE; goto error; } @@ -317,6 +375,15 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) u_int fpcopy; Elf_Sym sym; Elf_Addr p_start, p_end; +#if __ELF_WORD_SIZE == 64 + uint64_t scr_ssym; + uint64_t scr_esym; + uint64_t scr; +#else + uint32_t scr_ssym; + uint32_t scr_esym; + uint32_t scr; +#endif dp = NULL; shdr = NULL; @@ -391,6 +458,54 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) phdr = (Elf_Phdr *)(ef->firstpage + ehdr->e_phoff); for (i = 0; i < ehdr->e_phnum; i++) { + /* + * Fixup ELF endianness. + * + * The Xhdr structure was loaded using block read call to + * optimize file accesses. It might happen, that the endianness + * of the system memory is different that endianness of + * the ELF header. + * Swap fields here to guarantee that Xhdr always contain + * valid data regardless of architecture. + */ + if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { + phdr[i].p_type = be32toh(phdr[i].p_type); + phdr[i].p_flags = be32toh(phdr[i].p_flags); + if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) { + phdr[i].p_offset = be64toh(phdr[i].p_offset); + phdr[i].p_vaddr = be64toh(phdr[i].p_vaddr); + phdr[i].p_paddr = be64toh(phdr[i].p_paddr); + phdr[i].p_filesz = be64toh(phdr[i].p_filesz); + phdr[i].p_memsz = be64toh(phdr[i].p_memsz); + phdr[i].p_align = be64toh(phdr[i].p_align); + } else { + phdr[i].p_offset = be32toh(phdr[i].p_offset); + phdr[i].p_vaddr = be32toh(phdr[i].p_vaddr); + phdr[i].p_paddr = be32toh(phdr[i].p_paddr); + phdr[i].p_filesz = be32toh(phdr[i].p_filesz); + phdr[i].p_memsz = be32toh(phdr[i].p_memsz); + phdr[i].p_align = be32toh(phdr[i].p_align); + } + } else { + phdr[i].p_type = le32toh(phdr[i].p_type); + phdr[i].p_flags = le32toh(phdr[i].p_flags); + if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) { + phdr[i].p_offset = le64toh(phdr[i].p_offset); + phdr[i].p_vaddr = le64toh(phdr[i].p_vaddr); + phdr[i].p_paddr = le64toh(phdr[i].p_paddr); + phdr[i].p_filesz = le64toh(phdr[i].p_filesz); + phdr[i].p_memsz = le64toh(phdr[i].p_memsz); + phdr[i].p_align = le64toh(phdr[i].p_align); + } else { + phdr[i].p_offset = le32toh(phdr[i].p_offset); + phdr[i].p_vaddr = le32toh(phdr[i].p_vaddr); + phdr[i].p_paddr = le32toh(phdr[i].p_paddr); + phdr[i].p_filesz = le32toh(phdr[i].p_filesz); + phdr[i].p_memsz = le32toh(phdr[i].p_memsz); + phdr[i].p_align = le32toh(phdr[i].p_align); + } + } + /* We want to load PT_LOAD segments only.. */ if (phdr[i].p_type != PT_LOAD) continue; @@ -465,6 +580,60 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) "_loadimage: failed to read section headers"); goto nosyms; } + + /* + * Fixup ELF endianness. + * + * The Xhdr structure was loaded using block read call to + * optimize file accesses. It might happen, that the endianness + * of the system memory is different that endianness of + * the ELF header. + * Swap fields here to guarantee that Xhdr always contain + * valid data regardless of architecture. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) { + shdr[i].sh_name = be32toh(shdr[i].sh_name); + shdr[i].sh_type = be32toh(shdr[i].sh_type); + shdr[i].sh_link = be32toh(shdr[i].sh_link); + shdr[i].sh_info = be32toh(shdr[i].sh_info); + if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) { + shdr[i].sh_flags = be64toh(shdr[i].sh_flags); + shdr[i].sh_addr = be64toh(shdr[i].sh_addr); + shdr[i].sh_offset = be64toh(shdr[i].sh_offset); + shdr[i].sh_size = be64toh(shdr[i].sh_size); + shdr[i].sh_addralign = be64toh(shdr[i].sh_addralign); + shdr[i].sh_entsize = be64toh(shdr[i].sh_entsize); + } else { + shdr[i].sh_flags = be32toh(shdr[i].sh_flags); + shdr[i].sh_addr = be32toh(shdr[i].sh_addr); + shdr[i].sh_offset = be32toh(shdr[i].sh_offset); + shdr[i].sh_size = be32toh(shdr[i].sh_size); + shdr[i].sh_addralign = be32toh(shdr[i].sh_addralign); + shdr[i].sh_entsize = be32toh(shdr[i].sh_entsize); + } + } else { + shdr[i].sh_name = le32toh(shdr[i].sh_name); + shdr[i].sh_type = le32toh(shdr[i].sh_type); + shdr[i].sh_link = le32toh(shdr[i].sh_link); + shdr[i].sh_info = le32toh(shdr[i].sh_info); + if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) { + shdr[i].sh_flags = le64toh(shdr[i].sh_flags); + shdr[i].sh_addr = le64toh(shdr[i].sh_addr); + shdr[i].sh_offset = le64toh(shdr[i].sh_offset); + shdr[i].sh_size = le64toh(shdr[i].sh_size); + shdr[i].sh_addralign = le64toh(shdr[i].sh_addralign); + shdr[i].sh_entsize = le64toh(shdr[i].sh_entsize); + } else { + shdr[i].sh_flags = le32toh(shdr[i].sh_flags); + shdr[i].sh_addr = le32toh(shdr[i].sh_addr); + shdr[i].sh_offset = le32toh(shdr[i].sh_offset); + shdr[i].sh_size = le32toh(shdr[i].sh_size); + shdr[i].sh_addralign = le32toh(shdr[i].sh_addralign); + shdr[i].sh_entsize = le32toh(shdr[i].sh_entsize); + } + } + } file_addmetadata(fp, MODINFOMD_SHDR, chunk, shdr); /* @@ -540,10 +709,16 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) break; } #endif - size = shdr[i].sh_size; - archsw.arch_copyin(&size, lastaddr, sizeof(size)); - lastaddr += sizeof(size); +#if defined(__powerpc__) + #if __ELF_WORD_SIZE == 64 + scr = htobe64(size); + #else + scr = htobe32(size); + #endif +#endif + archsw.arch_copyin(&scr, lastaddr, sizeof(scr)); + lastaddr += sizeof(scr); #ifdef ELF_VERBOSE printf("\n%s: 0x%jx@0x%jx -> 0x%jx-0x%jx", secname, @@ -582,8 +757,22 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_t ef, u_int64_t off) printf("]"); #endif - file_addmetadata(fp, MODINFOMD_SSYM, sizeof(ssym), &ssym); - file_addmetadata(fp, MODINFOMD_ESYM, sizeof(esym), &esym); +#if defined(__powerpc__) + /* On PowerPC we always need to provide BE data to the kernel */ + #if __ELF_WORD_SIZE == 64 + scr_ssym = htobe64((uint64_t)ssym); + scr_esym = htobe64((uint64_t)esym); + #else + scr_ssym = htobe32((uint32_t)ssym); + scr_esym = htobe32((uint32_t)esym); + #endif +#else + scr_ssym = ssym; + scr_esym = esym; +#endif + + file_addmetadata(fp, MODINFOMD_SSYM, sizeof(scr_ssym), &scr_ssym); + file_addmetadata(fp, MODINFOMD_ESYM, sizeof(scr_esym), &scr_esym); nosyms: printf("\n"); diff --git a/stand/powerpc/kboot/conf.c b/stand/powerpc/kboot/conf.c index 104dd93ca2fb..22562e898ef1 100644 --- a/stand/powerpc/kboot/conf.c +++ b/stand/powerpc/kboot/conf.c @@ -78,6 +78,7 @@ struct fs_ops *file_system[] = { #if defined(LOADER_BZIP2_SUPPORT) &bzipfs_fsops, #endif + &dosfs_fsops, NULL }; diff --git a/stand/powerpc/kboot/host_syscall.S b/stand/powerpc/kboot/host_syscall.S index 3607fdb0d2a1..1cb9016715ac 100644 --- a/stand/powerpc/kboot/host_syscall.S +++ b/stand/powerpc/kboot/host_syscall.S @@ -14,7 +14,6 @@ ENTRY(host_read) li %r3, 0 blr - ENTRY(host_write) li %r0, 4 # SYS_write sc @@ -28,6 +27,11 @@ ENTRY(host_seek) sc blr +ENTRY(host_llseek) + li %r0, 140 # SYS_llseek + sc + blr + ENTRY(host_open) li %r0, 5 # SYS_open sc @@ -47,6 +51,11 @@ ENTRY(host_mmap) sc blr +ENTRY(host_uname) + li %r0, 122 # SYS_uname + sc + blr + ENTRY(host_gettimeofday) li %r0, 78 # SYS_gettimeofday sc diff --git a/stand/powerpc/kboot/host_syscall.h b/stand/powerpc/kboot/host_syscall.h index 349c54e586a6..50b0725be675 100644 --- a/stand/powerpc/kboot/host_syscall.h +++ b/stand/powerpc/kboot/host_syscall.h @@ -34,9 +34,18 @@ ssize_t host_read(int fd, void *buf, size_t nbyte); ssize_t host_write(int fd, const void *buf, size_t nbyte); ssize_t host_seek(int fd, int64_t offset, int whence); int host_open(const char *path, int flags, int mode); +ssize_t host_llseek(int fd, int32_t offset_high, int32_t offset_lo, uint64_t *result, int whence); int host_close(int fd); void *host_mmap(void *addr, size_t len, int prot, int flags, int fd, int); #define host_getmem(size) host_mmap(0, size, 3 /* RW */, 0x22 /* ANON */, -1, 0); +struct old_utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; +}; +int host_uname(struct old_utsname *); struct host_timeval { int tv_sec; int tv_usec; @@ -44,8 +53,8 @@ struct host_timeval { int host_gettimeofday(struct host_timeval *a, void *b); int host_select(int nfds, long *readfds, long *writefds, long *exceptfds, struct host_timeval *timeout); -int kexec_load(vm_offset_t start, int nsegs, void *segs); -int host_reboot(int, int, int, void *); +int kexec_load(uint32_t start, int nsegs, uint32_t segs); +int host_reboot(int, int, int, uint32_t); int host_getdents(int fd, void *dirp, int count); #endif diff --git a/stand/powerpc/kboot/hostdisk.c b/stand/powerpc/kboot/hostdisk.c index 26d39596db66..3c36f2d3bce6 100644 --- a/stand/powerpc/kboot/hostdisk.c +++ b/stand/powerpc/kboot/hostdisk.c @@ -64,10 +64,14 @@ hostdisk_strategy(void *devdata, int flag, daddr_t dblk, size_t size, struct devdesc *desc = devdata; daddr_t pos; int n; - + uint64_t res; + uint32_t posl, posh; + pos = dblk * 512; - if (host_seek(desc->d_unit, pos, 0) < 0) { + posl = pos & 0xffffffff; + posh = (pos >> 32) & 0xffffffff; + if (host_llseek(desc->d_unit, posh, posl, &res, 0) < 0) { printf("Seek error\n"); return (EIO); } diff --git a/stand/powerpc/kboot/kerneltramp.S b/stand/powerpc/kboot/kerneltramp.S index a394c9554f30..15fdfc26bd90 100644 --- a/stand/powerpc/kboot/kerneltramp.S +++ b/stand/powerpc/kboot/kerneltramp.S @@ -20,6 +20,18 @@ CNAME(kerneltramp): bl 2f .space 24 /* branch address, r3-r7 */ +/* + * MUST BE IN SYNC WITH: + * struct trampoline_data { + * uint32_t kernel_entry; + * uint32_t dtb; + * uint32_t phys_mem_offset; + * uint32_t of_entry; + * uint32_t mdp; + * uint32_t mdp_size; + * }; + */ + . = kerneltramp + 0x40 /* AP spinlock */ .long 0 @@ -36,18 +48,53 @@ CNAME(kerneltramp): sync ba 0x100 -2: /* Continuation of kerneltramp */ +2: /* Continuation of kerneltramp */ mflr %r8 mtlr %r9 - lwz %r3,0(%r8) - mtctr %r3 + + mfmsr %r10 + andi. %r10, %r10, 1 /* test MSR_LE */ + bne little_endian + +/* We're starting in BE */ +big_endian: lwz %r3,4(%r8) lwz %r4,8(%r8) lwz %r5,12(%r8) lwz %r6,16(%r8) lwz %r7,20(%r8) + + lwz %r10, 0(%r8) + mtctr %r10 bctr - + +/* We're starting in LE */ +little_endian: + + /* Entries are BE, swap them during load. */ + li %r10, 4 + lwbrx %r3, %r8, %r10 + li %r10, 8 + lwbrx %r4, %r8, %r10 + li %r10, 12 + lwbrx %r5, %r8, %r10 + li %r10, 16 + lwbrx %r6, %r8, %r10 + li %r10, 20 + lwbrx %r7, %r8, %r10 + + /* Clear MSR_LE flag to enter the BE world */ + mfmsr %r10 + clrrdi %r10, %r10, 1 + mtsrr1 %r10 + + /* Entry is at 0(%r8) */ + li %r10, 0 + lwbrx %r10, %r8, %r10 + mtsrr0 %r10 + + rfid + endkerneltramp: .data diff --git a/stand/powerpc/kboot/main.c b/stand/powerpc/kboot/main.c index cbd11611ab66..d4f59ab1bcac 100644 --- a/stand/powerpc/kboot/main.c +++ b/stand/powerpc/kboot/main.c @@ -27,6 +27,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include @@ -35,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "bootstrap.h" #include "host_syscall.h" + struct arch_switch archsw; extern void *_end; @@ -47,9 +49,170 @@ ssize_t kboot_readin(const int fd, vm_offset_t dest, const size_t len); int kboot_autoload(void); uint64_t kboot_loadaddr(u_int type, void *data, uint64_t addr); int kboot_setcurrdev(struct env_var *ev, int flags, const void *value); +static void kboot_kseg_get(int *nseg, void **ptr); extern int command_fdt_internal(int argc, char *argv[]); +struct region_desc { + uint64_t start; + uint64_t end; +}; + +static uint64_t +kboot_get_phys_load_segment(void) +{ + int fd; + uint64_t entry[2]; + static uint64_t load_segment = ~(0UL); + uint64_t val_64; + uint32_t val_32; + struct region_desc rsvd_reg[32]; + int rsvd_reg_cnt = 0; + int ret, a, b; + uint64_t start, end; + + if (load_segment == ~(0UL)) { + + /* Default load address is 0x00000000 */ + load_segment = 0UL; + + /* Read reserved regions */ + fd = host_open("/proc/device-tree/reserved-ranges", O_RDONLY, 0); + if (fd >= 0) { + while (host_read(fd, &entry[0], sizeof(entry)) == sizeof(entry)) { + rsvd_reg[rsvd_reg_cnt].start = be64toh(entry[0]); + rsvd_reg[rsvd_reg_cnt].end = + be64toh(entry[1]) + rsvd_reg[rsvd_reg_cnt].start - 1; + rsvd_reg_cnt++; + } + host_close(fd); + } + /* Read where the kernel ends */ + fd = host_open("/proc/device-tree/chosen/linux,kernel-end", O_RDONLY, 0); + if (fd >= 0) { + ret = host_read(fd, &val_64, sizeof(val_64)); + + if (ret == sizeof(uint64_t)) { + rsvd_reg[rsvd_reg_cnt].start = 0; + rsvd_reg[rsvd_reg_cnt].end = be64toh(val_64) - 1; + } else { + memcpy(&val_32, &val_64, sizeof(val_32)); + rsvd_reg[rsvd_reg_cnt].start = 0; + rsvd_reg[rsvd_reg_cnt].end = be32toh(val_32) - 1; + } + rsvd_reg_cnt++; + + host_close(fd); + } + /* Read memory size (SOCKET0 only) */ + fd = host_open("/proc/device-tree/memory@0/reg", O_RDONLY, 0); + if (fd < 0) + fd = host_open("/proc/device-tree/memory/reg", O_RDONLY, 0); + if (fd >= 0) { + ret = host_read(fd, &entry, sizeof(entry)); + + /* Memory range in start:length format */ + entry[0] = be64toh(entry[0]); + entry[1] = be64toh(entry[1]); + + /* Reserve everything what is before start */ + if (entry[0] != 0) { + rsvd_reg[rsvd_reg_cnt].start = 0; + rsvd_reg[rsvd_reg_cnt].end = entry[0] - 1; + rsvd_reg_cnt++; + } + /* Reserve everything what is after end */ + if (entry[1] != 0xffffffffffffffffUL) { + rsvd_reg[rsvd_reg_cnt].start = entry[0] + entry[1]; + rsvd_reg[rsvd_reg_cnt].end = 0xffffffffffffffffUL; + rsvd_reg_cnt++; + } + + host_close(fd); + } + + /* Sort entries in ascending order (bubble) */ + for (a = rsvd_reg_cnt - 1; a > 0; a--) { + for (b = 0; b < a; b++) { + if (rsvd_reg[b].start > rsvd_reg[b + 1].start) { + struct region_desc tmp; + tmp = rsvd_reg[b]; + rsvd_reg[b] = rsvd_reg[b + 1]; + rsvd_reg[b + 1] = tmp; + } + } + } + + /* Join overlapping/adjacent regions */ + for (a = 0; a < rsvd_reg_cnt - 1; ) { + + if ((rsvd_reg[a + 1].start >= rsvd_reg[a].start) && + ((rsvd_reg[a + 1].start - 1) <= rsvd_reg[a].end)) { + /* We have overlapping/adjacent regions! */ + rsvd_reg[a].end = + MAX(rsvd_reg[a].end, rsvd_reg[a + a].end); + + for (b = a + 1; b < rsvd_reg_cnt - 1; b++) + rsvd_reg[b] = rsvd_reg[b + 1]; + rsvd_reg_cnt--; + } else + a++; + } + + /* Find the first free region */ + if (rsvd_reg_cnt > 0) { + start = 0; + end = rsvd_reg[0].start; + for (a = 0; a < rsvd_reg_cnt - 1; a++) { + if ((start >= rsvd_reg[a].start) && + (start <= rsvd_reg[a].end)) { + start = rsvd_reg[a].end + 1; + end = rsvd_reg[a + 1].start; + } else + break; + } + + if (start != end) { + uint64_t align = 64UL*1024UL*1024UL; + + /* Align both to 64MB boundary */ + start = (start + align - 1UL) & ~(align - 1UL); + end = ((end + 1UL) & ~(align - 1UL)) - 1UL; + + if (start < end) + load_segment = start; + } + } + } + + return (load_segment); +} + +uint8_t +kboot_get_kernel_machine_bits(void) +{ + static uint8_t bits = 0; + struct old_utsname utsname; + int ret; + + if (bits == 0) { + /* Default is 32-bit kernel */ + bits = 32; + + /* Try to get system type */ + memset(&utsname, 0, sizeof(utsname)); + ret = host_uname(&utsname); + if (ret == 0) { + if (strcmp(utsname.machine, "ppc64") == 0) + bits = 64; + else if (strcmp(utsname.machine, "ppc64le") == 0) + bits = 64; + } + } + + return (bits); +} + int kboot_getdev(void **vdev, const char *devspec, const char **path) { @@ -94,7 +257,7 @@ main(int argc, const char **argv) { void *heapbase; const size_t heapsize = 15*1024*1024; - const char *bootdev = argv[1]; + const char *bootdev; /* * Set the heap to one page after the end of the loader. @@ -107,6 +270,12 @@ main(int argc, const char **argv) */ cons_probe(); + /* Choose bootdev if provided */ + if (argc > 1) + bootdev = argv[1]; + else + bootdev = ""; + printf("Boot device: %s\n", bootdev); archsw.arch_getdev = kboot_getdev; @@ -115,6 +284,7 @@ main(int argc, const char **argv) archsw.arch_readin = kboot_readin; archsw.arch_autoload = kboot_autoload; archsw.arch_loadaddr = kboot_loadaddr; + archsw.arch_kexec_kseg_get = kboot_kseg_get; printf("\n%s", bootprog_info); @@ -181,7 +351,7 @@ static ssize_t get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) { int i = 0; - const size_t segsize = 2*1024*1024; + const size_t segsize = 4*1024*1024; for (i = 0; i < nkexec_segments; i++) { if (dest >= (vm_offset_t)loaded_segments[i].mem && @@ -194,6 +364,7 @@ get_phys_buffer(vm_offset_t dest, const size_t len, void **buf) loaded_segments[nkexec_segments].bufsz = segsize; loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,segsize); loaded_segments[nkexec_segments].memsz = segsize; + i = nkexec_segments; nkexec_segments++; @@ -283,19 +454,34 @@ kboot_autoload(void) uint64_t kboot_loadaddr(u_int type, void *data, uint64_t addr) { - /* - * Need to stay out of the way of Linux. /chosen/linux,kernel-end does - * a better job here, but use a fixed offset for now. - */ if (type == LOAD_ELF) addr = roundup(addr, PAGE_SIZE); else - addr += 64*1024*1024; /* Stay out of the way of Linux */ + addr += kboot_get_phys_load_segment(); return (addr); } +static void +kboot_kseg_get(int *nseg, void **ptr) +{ +#if 0 + int a; + + for (a = 0; a < nkexec_segments; a++) { + printf("kseg_get: %jx %jx %jx %jx\n", + (uintmax_t)loaded_segments[a].buf, + (uintmax_t)loaded_segments[a].bufsz, + (uintmax_t)loaded_segments[a].mem, + (uintmax_t)loaded_segments[a].memsz); + } +#endif + + *nseg = nkexec_segments; + *ptr = &loaded_segments[0]; +} + void _start(int argc, const char **argv, char **env) { diff --git a/stand/powerpc/kboot/metadata.c b/stand/powerpc/kboot/metadata.c index 1e8c314c64e6..2892ce506590 100644 --- a/stand/powerpc/kboot/metadata.c +++ b/stand/powerpc/kboot/metadata.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -157,7 +158,7 @@ md_copyenv(vm_offset_t addr) static int align; #define COPY32(v, a, c) { \ - u_int32_t x = (v); \ + u_int32_t x = htobe32(v); \ if (c) \ archsw.arch_copyin(&x, a, sizeof(x)); \ a += sizeof(x); \ @@ -254,11 +255,12 @@ md_load_dual(char *args, vm_offset_t *modulep, vm_offset_t *dtb, int kern64) vm_offset_t fdtp; vm_offset_t size; uint64_t scratch64; + uint32_t scratch32; char *rootdevname; int howto; align = kern64 ? 8 : 4; - howto = md_getboothowto(args); + howto = htobe32(md_getboothowto(args)); /* * Allow the environment variable 'rootdev' to override the supplied device @@ -300,16 +302,19 @@ md_load_dual(char *args, vm_offset_t *modulep, vm_offset_t *dtb, int kern64) panic("can't find kernel file"); file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto); if (kern64) { - scratch64 = envp; + scratch64 = htobe64(envp); file_addmetadata(kfp, MODINFOMD_ENVP, sizeof scratch64, &scratch64); - scratch64 = fdtp; + scratch64 = htobe64(fdtp); file_addmetadata(kfp, MODINFOMD_DTBP, sizeof scratch64, &scratch64); - scratch64 = kernend; + scratch64 = htobe64(kernend); file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof scratch64, &scratch64); } else { - file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp); - file_addmetadata(kfp, MODINFOMD_DTBP, sizeof fdtp, &fdtp); - file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend); + scratch32 = htobe32(envp); + file_addmetadata(kfp, MODINFOMD_ENVP, sizeof scratch32, &scratch32); + scratch32 = htobe32(fdtp); + file_addmetadata(kfp, MODINFOMD_DTBP, sizeof scratch32, &scratch32); + scratch32 = htobe32(kernend); + file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof scratch32, &scratch32); } *modulep = addr; @@ -318,7 +323,7 @@ md_load_dual(char *args, vm_offset_t *modulep, vm_offset_t *dtb, int kern64) md = file_findmetadata(kfp, MODINFOMD_KERNEND); if (kern64) { - scratch64 = kernend; + scratch64 = htobe64(kernend); bcopy(&scratch64, md->md_data, sizeof scratch64); } else { bcopy(&kernend, md->md_data, sizeof kernend); diff --git a/stand/powerpc/kboot/ppc64_elf_freebsd.c b/stand/powerpc/kboot/ppc64_elf_freebsd.c index 987565eaa2bd..954707716744 100644 --- a/stand/powerpc/kboot/ppc64_elf_freebsd.c +++ b/stand/powerpc/kboot/ppc64_elf_freebsd.c @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 64 #include +#include #include #include @@ -43,8 +44,15 @@ __FBSDID("$FreeBSD$"); extern char end[]; extern void *kerneltramp; extern size_t szkerneltramp; -extern int nkexec_segments; -extern void * loaded_segments; + +struct trampoline_data { + uint32_t kernel_entry; + uint32_t dtb; + uint32_t phys_mem_offset; + uint32_t of_entry; + uint32_t mdp; + uint32_t mdp_size; +}; vm_offset_t md_load64(char *args, vm_offset_t *modulep, vm_offset_t *dtb); @@ -70,53 +78,90 @@ ppc64_elf_exec(struct preloaded_file *fp) int error; uint32_t *trampoline; uint64_t entry; - vm_offset_t trampolinebase; + uint64_t trampolinebase; + struct trampoline_data *trampoline_data; + int nseg; + void *kseg; if ((fmp = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL) { return(EFTYPE); } e = (Elf_Ehdr *)&fmp->md_data; - /* Figure out where to put it */ + /* + * Figure out where to put it. + * + * Linux does not allow to do kexec_load into + * any part of memory. Ask arch_loadaddr to + * resolve the first available chunk of physical + * memory where loading is possible (load_addr). + * + * Memory organization is shown below. + * It is assumed, that text segment offset of + * kernel ELF (KERNPHYSADDR) is non-zero, + * which is true for PPC/PPC64 architectures, + * where default is 0x100000. + * + * load_addr: trampoline code + * load_addr + KERNPHYSADDR: kernel text segment + */ trampolinebase = archsw.arch_loadaddr(LOAD_RAW, NULL, 0); - + printf("Load address at %#jx\n", (uintmax_t)trampolinebase); + printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset); + /* Set up loader trampoline */ trampoline = malloc(szkerneltramp); memcpy(trampoline, &kerneltramp, szkerneltramp); + /* Parse function descriptor for ELFv1 kernels */ if ((e->e_flags & 3) == 2) entry = e->e_entry; - else + else { archsw.arch_copyout(e->e_entry + elf64_relocation_offset, &entry, 8); - trampoline[2] = entry + elf64_relocation_offset; - trampoline[4] = 0; /* Phys. mem offset */ - trampoline[5] = 0; /* OF entry point */ + entry = be64toh(entry); + } + + /* + * Placeholder for trampoline data is at trampolinebase + 0x08 + * CAUTION: all data must be Big Endian + */ + trampoline_data = (void*)&trampoline[2]; + trampoline_data->kernel_entry = htobe32(entry + elf64_relocation_offset); + trampoline_data->phys_mem_offset = htobe32(0); + trampoline_data->of_entry = htobe32(0); if ((error = md_load64(fp->f_args, &mdp, &dtb)) != 0) return (error); - trampoline[3] = dtb; - trampoline[6] = mdp; - trampoline[7] = 0xfb5d104d; - printf("Kernel entry at %#jx (%#x) ...\n", e->e_entry, trampoline[2]); - printf("DTB at %#x, mdp at %#x\n", dtb, mdp); + trampoline_data->dtb = htobe32(dtb); + trampoline_data->mdp = htobe32(mdp); + trampoline_data->mdp_size = htobe32(0xfb5d104d); + + printf("Kernel entry at %#jx (%#x) ...\n", + entry, be32toh(trampoline_data->kernel_entry)); + printf("DTB at %#x, mdp at %#x\n", + be32toh(trampoline_data->dtb), be32toh(trampoline_data->mdp)); dev_cleanup(); archsw.arch_copyin(trampoline, trampolinebase, szkerneltramp); free(trampoline); - error = kexec_load(trampolinebase, nkexec_segments, &loaded_segments); + if (archsw.arch_kexec_kseg_get == NULL) + panic("architecture did not provide kexec segment mapping"); + archsw.arch_kexec_kseg_get(&nseg, &kseg); + + error = kexec_load(trampolinebase, nseg, (uintptr_t)kseg); if (error != 0) panic("kexec_load returned error: %d", error); + error = host_reboot(0xfee1dead, 672274793, - 0x45584543 /* LINUX_REBOOT_CMD_KEXEC */, NULL); + 0x45584543 /* LINUX_REBOOT_CMD_KEXEC */, (uintptr_t)NULL); if (error != 0) panic("reboot returned error: %d", error); - while (1) {} - panic("exec returned"); + while (1) {} } struct file_format ppc_elf64 =