kasan: Create a shadow for the bootstack prior to hammer_time()

When the kernel is compiled with -asan-stack=true, the address sanitizer
will emit inline accesses to the shadow map.  In other words, some
shadow map accesses are not intercepted by the KASAN runtime, so they
cannot be disabled even if the runtime is not yet initialized by
kasan_init() at the end of hammer_time().

This went unnoticed because the loader will initialize all PML4 entries
of the bootstrap page table to point to the same PDP page, so early
shadow map accesses do not raise a page fault, though they are silently
corrupting memory.  In fact, when the loader does not copy the staging
area, we do get a page fault since in that case only the first and last
PML4Es are populated by the loader.  But due to another bug, the loader
always treated KASAN kernels as non-relocatable and thus always copied
the staging area.

It is not really practical to annotate hammer_time() and all callees
with __nosanitizeaddress, so instead add some early initialization which
creates a shadow for the boot stack used by hammer_time().  This is only
needed by KASAN, not by KMSAN, but the shared pmap code handles both.

Reported by:	mhorne
Reviewed by:	kib
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D35449
This commit is contained in:
Mark Johnston 2022-06-15 10:48:16 -04:00
parent f6b799a86b
commit 756bc3adc5
8 changed files with 173 additions and 39 deletions

View File

@ -48,6 +48,8 @@
.set dmapbase,DMAP_MIN_ADDRESS
.set dmapend,DMAP_MAX_ADDRESS
#define BOOTSTACK_SIZE 4096
.text
/**********************************************************************
*
@ -66,14 +68,22 @@ ENTRY(btext)
pushq $PSL_KERNEL
popfq
/* Find the metadata pointers before we lose them */
/* Get onto a stack that we can trust - there is no going back now. */
movq %rsp, %rbp
movq $bootstack,%rsp
#ifdef KASAN
/* Bootstrap a shadow map for the boot stack. */
movq $bootstack, %rdi
subq $BOOTSTACK_SIZE, %rdi
movq $BOOTSTACK_SIZE, %rsi
call kasan_init_early
#endif
/* Grab metadata pointers from the loader. */
movl 4(%rbp),%edi /* modulep (arg 1) */
movl 8(%rbp),%esi /* kernend (arg 2) */
/* Get onto a stack that we can trust - there is no going back now. */
movq $bootstack,%rsp
xorl %ebp, %ebp
xorq %rbp, %rbp
call hammer_time /* set up cpu for unix operation */
movq %rax,%rsp /* set up kstack for mi_startup() */
@ -140,5 +150,5 @@ ENTRY(la57_trampoline_end)
.bss
ALIGN_DATA /* just to be sure */
.globl bootstack
.space 0x1000 /* space for bootstack - temporary stack */
.space BOOTSTACK_SIZE /* space for bootstack - temporary stack */
bootstack:

View File

@ -1260,16 +1260,43 @@ amd64_bsp_ist_init(struct pcpu *pc)
tssp->tss_ist4 = (long)np;
}
/*
* Calculate the kernel load address by inspecting page table created by loader.
* The assumptions:
* - kernel is mapped at KERNBASE, backed by contiguous phys memory
* aligned at 2M, below 4G (the latter is important for AP startup)
* - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M)
* - kernel is mapped with 2M superpages
* - all participating memory, i.e. kernel, modules, metadata,
* page table is accessible by pre-created 1:1 mapping
* (right now loader creates 1:1 mapping for lower 4G, and all
* memory is from there)
* - there is a usable memory block right after the end of the
* mapped kernel and all modules/metadata, pointed to by
* physfree, for early allocations
*/
vm_paddr_t __nosanitizeaddress __nosanitizememory
amd64_loadaddr(void)
{
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t *pde;
uint64_t cr3;
cr3 = rcr3();
pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART);
pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART);
pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART);
return (*pde & PG_FRAME);
}
u_int64_t
hammer_time(u_int64_t modulep, u_int64_t physfree)
{
caddr_t kmdp;
int gsel_tss, x;
struct pcpu *pc;
uint64_t cr3, rsp0;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t *pde;
uint64_t rsp0;
char *env;
struct user_segment_descriptor *gdt;
struct region_descriptor r_gdt;
@ -1278,34 +1305,9 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
TSRAW(&thread0, TS_ENTER, __func__, NULL);
/*
* Calculate kernphys by inspecting page table created by loader.
* The assumptions:
* - kernel is mapped at KERNBASE, backed by contiguous phys memory
* aligned at 2M, below 4G (the latter is important for AP startup)
* - there is a 2M hole at KERNBASE
* - kernel is mapped with 2M superpages
* - all participating memory, i.e. kernel, modules, metadata,
* page table is accessible by pre-created 1:1 mapping
* (right now loader creates 1:1 mapping for lower 4G, and all
* memory is from there)
* - there is a usable memory block right after the end of the
* mapped kernel and all modules/metadata, pointed to by
* physfree, for early allocations
*/
cr3 = rcr3();
pml4e = (pml4_entry_t *)(cr3 & ~PAGE_MASK) + pmap_pml4e_index(
(vm_offset_t)hammer_time);
pdpe = (pdp_entry_t *)(*pml4e & ~PAGE_MASK) + pmap_pdpe_index(
(vm_offset_t)hammer_time);
pde = (pd_entry_t *)(*pdpe & ~PAGE_MASK) + pmap_pde_index(
(vm_offset_t)hammer_time);
kernphys = (vm_paddr_t)(*pde & ~PDRMASK) -
(vm_paddr_t)(((vm_offset_t)hammer_time - KERNBASE) & ~PDRMASK);
kernphys = amd64_loadaddr();
/* Fix-up for 2M hole */
physfree += kernphys;
kernphys += NBPDR;
kmdp = init_ops.parse_preload_data(modulep);

View File

@ -11429,6 +11429,107 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
}
#if defined(KASAN) || defined(KMSAN)
/*
* Reserve enough memory to:
* 1) allocate PDP pages for the shadow map(s),
* 2) shadow one page of memory, so one PD page, one PT page, and one shadow
* page per shadow map.
*/
#ifdef KASAN
#define SAN_EARLY_PAGES (NKASANPML4E + 3)
#else
#define SAN_EARLY_PAGES (NKMSANSHADPML4E + NKMSANORIGPML4E + 2 * 3)
#endif
static uint64_t __nosanitizeaddress __nosanitizememory
pmap_san_enter_early_alloc_4k(uint64_t pabase)
{
static uint8_t data[PAGE_SIZE * SAN_EARLY_PAGES] __aligned(PAGE_SIZE);
static size_t offset = 0;
uint64_t pa;
if (offset == sizeof(data)) {
panic("%s: ran out of memory for the bootstrap shadow map",
__func__);
}
pa = pabase + ((vm_offset_t)&data[offset] - KERNSTART);
offset += PAGE_SIZE;
return (pa);
}
/*
* Map a shadow page, before the kernel has bootstrapped its page tables. This
* is currently only used to shadow the temporary boot stack set up by locore.
*/
static void __nosanitizeaddress __nosanitizememory
pmap_san_enter_early(vm_offset_t va)
{
static bool first = true;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t *pde;
pt_entry_t *pte;
uint64_t cr3, pa, base;
int i;
base = amd64_loadaddr();
cr3 = rcr3();
if (first) {
/*
* If this the first call, we need to allocate new PML4Es for
* the bootstrap shadow map(s). We don't know how the PML4 page
* was initialized by the boot loader, so we can't simply test
* whether the shadow map's PML4Es are zero.
*/
first = false;
#ifdef KASAN
for (i = 0; i < NKASANPML4E; i++) {
pa = pmap_san_enter_early_alloc_4k(base);
pml4e = (pml4_entry_t *)cr3 +
pmap_pml4e_index(KASAN_MIN_ADDRESS + i * NBPML4);
*pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V);
}
#else
for (i = 0; i < NKMSANORIGPML4E; i++) {
pa = pmap_san_enter_early_alloc_4k(base);
pml4e = (pml4_entry_t *)cr3 +
pmap_pml4e_index(KMSAN_ORIG_MIN_ADDRESS +
i * NBPML4);
*pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V);
}
for (i = 0; i < NKMSANSHADPML4E; i++) {
pa = pmap_san_enter_early_alloc_4k(base);
pml4e = (pml4_entry_t *)cr3 +
pmap_pml4e_index(KMSAN_SHAD_MIN_ADDRESS +
i * NBPML4);
*pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V);
}
#endif
}
pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(va);
pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(va);
if (*pdpe == 0) {
pa = pmap_san_enter_early_alloc_4k(base);
*pdpe = (pdp_entry_t)(pa | X86_PG_RW | X86_PG_V);
}
pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(va);
if (*pde == 0) {
pa = pmap_san_enter_early_alloc_4k(base);
*pde = (pd_entry_t)(pa | X86_PG_RW | X86_PG_V);
}
pte = (pt_entry_t *)(*pde & PG_FRAME) + pmap_pte_index(va);
if (*pte != 0)
panic("%s: PTE for %#lx is already initialized", __func__, va);
pa = pmap_san_enter_early_alloc_4k(base);
*pte = (pt_entry_t)(pa | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V);
}
static vm_page_t
pmap_san_enter_alloc_4k(void)
{
@ -11452,7 +11553,7 @@ pmap_san_enter_alloc_2m(void)
* Grow a shadow map by at least one 4KB page at the specified address. Use 2MB
* pages when possible.
*/
void
void __nosanitizeaddress __nosanitizememory
pmap_san_enter(vm_offset_t va)
{
pdp_entry_t *pdpe;
@ -11460,6 +11561,14 @@ pmap_san_enter(vm_offset_t va)
pt_entry_t *pte;
vm_page_t m;
if (kernphys == 0) {
/*
* We're creating a temporary shadow map for the boot stack.
*/
pmap_san_enter_early(va);
return;
}
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
pdpe = pmap_pdpe(kernel_pmap, va);

View File

@ -66,6 +66,12 @@ kasan_md_init(void)
{
}
static inline void
kasan_md_init_early(vm_offset_t bootstack, size_t size)
{
kasan_shadow_map(bootstack, size);
}
#endif /* KASAN */
#endif /* !_MACHINE_ASAN_H_ */

View File

@ -64,6 +64,7 @@ struct sysentvec;
void amd64_conf_fast_syscall(void);
void amd64_db_resume_dbreg(void);
vm_paddr_t amd64_loadaddr(void);
void amd64_lower_shared_page(struct sysentvec *);
void amd64_bsp_pcpu_init1(struct pcpu *pc);
void amd64_bsp_pcpu_init2(uint64_t rsp0);

View File

@ -533,6 +533,7 @@ void pmap_page_array_startup(long count);
vm_page_t pmap_page_alloc_below_4g(bool zeroed);
#if defined(KASAN) || defined(KMSAN)
void pmap_san_bootstrap(void);
void pmap_san_enter(vm_offset_t);
#endif

View File

@ -139,6 +139,12 @@ kasan_init(void)
kasan_enabled = true;
}
void
kasan_init_early(vm_offset_t stack, size_t size)
{
kasan_md_init_early(stack, size);
}
static inline const char *
kasan_code_name(uint8_t code)
{

View File

@ -56,11 +56,10 @@
#define KASAN_EXEC_ARGS_FREED 0xFF
void kasan_init(void);
void kasan_init_early(vm_offset_t, size_t);
void kasan_shadow_map(vm_offset_t, size_t);
void kasan_mark(const void *, size_t, size_t, uint8_t);
#else /* KASAN */
#define kasan_early_init(u)
#define kasan_init()
#define kasan_shadow_map(a, s)
#define kasan_mark(p, s, l, c)