From f2577f25c192695eb7449db7d733ec1803a9a4f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= Date: Thu, 19 Jul 2018 07:39:35 +0000 Subject: [PATCH] xen: add PVHv2 entry point The PVHv2 entry point is fairly similar to the multiboot1 one. The kernel is started in protected mode with paging disabled. More information about the exact BSP state can be found in the pvh.markdown document on the Xen tree. This entry point is going to be joined with the native entry point at hammer_time, and in order to do so the BSP needs to be bootstrapped into long mode with the same set of page tables as used on bare metal. Sponsored by: Citrix Systems R&D --- sys/amd64/amd64/xen-locore.S | 147 +++++++++++++++++++++++++++++++++++ sys/xen/interface/elfnote.h | 12 ++- 2 files changed, 158 insertions(+), 1 deletion(-) diff --git a/sys/amd64/amd64/xen-locore.S b/sys/amd64/amd64/xen-locore.S index 06349fd90610..899499adda60 100644 --- a/sys/amd64/amd64/xen-locore.S +++ b/sys/amd64/amd64/xen-locore.S @@ -42,6 +42,12 @@ #include "assym.inc" +#define VTOP(x) ((x) - KERNBASE) +#define ENTRY_SIZE 8 /* sizeof(uint64_t) */ + +#define GDT_CODE 0x08 +#define GDT_DATA 0x10 + .section __xen_guest ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "FreeBSD") ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, __XSTRING(__FreeBSD_version)) @@ -57,6 +63,8 @@ ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0) ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes") + /* For PVHv2 support. */ + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, VTOP(xen_start32)) .text .p2align PAGE_SHIFT, 0x90 /* Hypercall_page needs to be PAGE aligned */ @@ -64,6 +72,7 @@ NON_GPROF_ENTRY(hypercall_page) .skip 0x1000, 0x90 /* Fill with "nop"s */ +/* Legacy PVH entry point, to be removed. */ NON_GPROF_ENTRY(xen_start) /* Don't trust what the loader gives for rflags. */ pushq $PSL_KERNEL @@ -85,3 +94,141 @@ NON_GPROF_ENTRY(xen_start) /* NOTREACHED */ 0: hlt jmp 0b + +/* PVH entry point. */ + .code32 +NON_GPROF_ENTRY(xen_start32) + + /* Load flat GDT */ + movl $VTOP(gdtdesc32), %eax + lgdt (%eax) + jmp $GDT_CODE, $VTOP(reload_cs) + +reload_cs: + movw $GDT_DATA, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + + movl $VTOP(bootstack), %esp + + /* Don't trust what the loader gives for eflags. */ + pushl $PSL_KERNEL + popfl + + /* + * Create the page tables. + * The first 1GB is mapped using 2MB entries. + */ + movl $0, %eax +pgbuild: + cmp $(PAGE_SIZE/ENTRY_SIZE), %eax + jae pgbuild_done + + /* PT4[i] = VTOP(&PT3[0]) | PG_V | PG_RW | PG_U */ + movl $VTOP(PT4), %ecx + movl $VTOP(PT3), %edx + orl $(PG_V | PG_RW | PG_U), %edx + movl %edx, (%ecx,%eax,ENTRY_SIZE) + + /* PT3[i] = VTOP(&PT2[0]) | PG_V | PG_RW | PG_U */ + movl $VTOP(PT3), %ecx + movl $VTOP(PT2), %edx + orl $(PG_V | PG_RW | PG_U), %edx + movl %edx, (%ecx,%eax,ENTRY_SIZE) + + /* PT2[i] = i * 2MiB | PG_V | PG_RW | PG_PS | PG_U */ + movl $VTOP(PT2), %ecx + movl %eax, %edx + shll $PDRSHIFT, %edx + orl $(PG_V | PG_RW | PG_PS | PG_U), %edx + movl %edx, (%ecx,%eax,ENTRY_SIZE) + + inc %eax + jmp pgbuild + +pgbuild_done: + /* Turn on EFER.LME */ + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_LME, %eax + wrmsr + + /* Turn on PAE */ + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 + + /* Set %cr3 for PT4 */ + movl $VTOP(PT4), %eax + movl %eax, %cr3 + + /* Turn on paging (implicitly sets EFER.LMA) */ + movl %cr0, %eax + orl $CR0_PG, %eax + movl %eax, %cr0 + + /* Now we're in compatibility mode. Set %cs for long mode */ + movl $VTOP(gdtdesc), %eax + lgdt (%eax) + ljmp $GDT_CODE, $VTOP(longmode) + + .code64 +longmode: + /* We're still running V=P, jump to entry point */ + movq $bootstack, %rsp + movq $start_kernel, %rax + pushq %rax + ret + +start_kernel: + /* + * Pass %ebx as the argument to hammer_time_xen, it contains + * the startup info. + */ + movq %rbx, %rdi + call hammer_time_xen + movq %rax, %rsp + call mi_startup + + /* NOTREACHED */ +0: hlt + jmp 0b + +/* Space for initial page tables */ + .data + .p2align 12,0x40 +PT4: + .space 0x1000 +PT3: + .space 0x1000 +PT2: + .space 0x1000 + +/* 64bit GDT */ +gdtdesc: + .word gdtend - gdt + .long VTOP(gdt) # low + .long 0 # high +gdt: + .long 0 # null descriptor + .long 0 + .long 0x00000000 # %cs + .long 0x00209800 + .long 0x00000000 # %ds + .long 0x00008000 +gdtend: + +/* 32bit GDT */ +gdtdesc32: + .word gdt32end - gdt32 + .long VTOP(gdt32) + .long 0 +gdt32: + .long 0 # null descriptor + .long 0 + .long 0x0000ffff # %cs + .long 0x00cf9a00 + .long 0x0000ffff # %ds, %es, %ss + .long 0x00cf9200 +gdt32end: diff --git a/sys/xen/interface/elfnote.h b/sys/xen/interface/elfnote.h index 3824a94572b5..353985fe58f3 100644 --- a/sys/xen/interface/elfnote.h +++ b/sys/xen/interface/elfnote.h @@ -199,10 +199,20 @@ */ #define XEN_ELFNOTE_SUPPORTED_FEATURES 17 +/* + * Physical entry point into the kernel. + * + * 32bit entry point into the kernel. When requested to launch the + * guest kernel in a HVM container, Xen will use this entry point to + * launch the guest in 32bit protected mode with paging disabled. + * Ignored otherwise. + */ +#define XEN_ELFNOTE_PHYS32_ENTRY 18 + /* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY /* * System information exported through crash notes.