From 8978608832c28572bbf5adadb9cfb077e8f15255 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 10 Aug 2021 16:52:36 -0400 Subject: [PATCH] amd64: Populate the KMSAN shadow maps and integrate with the VM - During boot, allocate PDP pages for the shadow maps. The region above KERNBASE is currently not shadowed. - Create a dummy shadow for the vm page array. For now, this array is not protected by the shadow map to help reduce kernel memory usage. - Grow shadows when growing the kernel map. - Increase the default kernel stack size when KMSAN is enabled. As with KASAN, sanitizer instrumentation appears to create stack frames large enough that the default value is not sufficient. - Disable UMA's use of the direct map when KMSAN is configured. KMSAN cannot validate the direct map. - Disable unmapped I/O when KMSAN configured. - Lower the limit on paging buffers when KMSAN is configured. Each buffer has a static MAXPHYS-sized allocation of KVA, which in turn eats 2*MAXPHYS of space in the shadow map. Reviewed by: alc, kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D31295 --- sys/amd64/amd64/pmap.c | 122 +++++++++++++++++++++++++++++++++++- sys/amd64/include/param.h | 2 +- sys/amd64/include/vmparam.h | 2 +- sys/kern/kern_malloc.c | 8 ++- sys/kern/vfs_bio.c | 15 ++++- sys/vm/vm_pager.c | 9 +++ 6 files changed, 148 insertions(+), 10 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index d7a5e16d3dd6..aae35c5d7e07 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -122,6 +122,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -161,8 +162,8 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include +#include #include #include #ifdef SMP @@ -430,6 +431,17 @@ u_int64_t KPML5phys; /* phys addr of kernel level 5, #ifdef KASAN static uint64_t KASANPDPphys; #endif +#ifdef KMSAN +static uint64_t KMSANSHADPDPphys; +static uint64_t KMSANORIGPDPphys; + +/* + * To support systems with large amounts of memory, it is necessary to extend + * the maximum size of the direct map. This could eat into the space reserved + * for the shadow map. + */ +_Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow"); +#endif static pml4_entry_t *kernel_pml4; static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ @@ -1682,13 +1694,21 @@ create_pagetables(vm_paddr_t *firstaddr) DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g); dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; - /* Allocate pages */ + /* Allocate pages. */ KPML4phys = allocpages(firstaddr, 1); KPDPphys = allocpages(firstaddr, NKPML4E); #ifdef KASAN KASANPDPphys = allocpages(firstaddr, NKASANPML4E); KASANPDphys = allocpages(firstaddr, 1); #endif +#ifdef KMSAN + /* + * The KMSAN shadow maps are initially left unpopulated, since there is + * no need to shadow memory above KERNBASE. + */ + KMSANSHADPDPphys = allocpages(firstaddr, NKMSANSHADPML4E); + KMSANORIGPDPphys = allocpages(firstaddr, NKMSANORIGPML4E); +#endif /* * Allocate the initial number of kernel page table pages required to @@ -1826,6 +1846,20 @@ create_pagetables(vm_paddr_t *firstaddr) } #endif +#ifdef KMSAN + /* Connect the KMSAN shadow map slots up to the PML4. */ + for (i = 0; i < NKMSANSHADPML4E; i++) { + p4_p[KMSANSHADPML4I + i] = KMSANSHADPDPphys + ptoa(i); + p4_p[KMSANSHADPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; + } + + /* Connect the KMSAN origin map slots up to the PML4. */ + for (i = 0; i < NKMSANORIGPML4E; i++) { + p4_p[KMSANORIGPML4I + i] = KMSANORIGPDPphys + ptoa(i); + p4_p[KMSANORIGPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; + } +#endif + /* Connect the Direct Map slots up to the PML4. */ for (i = 0; i < ndmpdpphys; i++) { p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); @@ -2498,6 +2532,14 @@ pmap_init(void) TUNABLE_INT_FETCH("vm.pmap.large_map_pml4_entries", &lm_ents); if (lm_ents > LMEPML4I - LMSPML4I + 1) lm_ents = LMEPML4I - LMSPML4I + 1; +#ifdef KMSAN + if (lm_ents > KMSANORIGPML4I - LMSPML4I) { + printf( + "pmap: shrinking large map for KMSAN (%d slots to %ld slots)\n", + lm_ents, KMSANORIGPML4I - LMSPML4I); + lm_ents = KMSANORIGPML4I - LMSPML4I; + } +#endif if (bootverbose) printf("pmap: large map %u PML4 slots (%lu GB)\n", lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024)); @@ -4186,6 +4228,16 @@ pmap_pinit_pml4(vm_page_t pml4pg) pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | pg_nx; } +#endif +#ifdef KMSAN + for (i = 0; i < NKMSANSHADPML4E; i++) { + pm_pml4[KMSANSHADPML4I + i] = (KMSANSHADPDPphys + ptoa(i)) | + X86_PG_RW | X86_PG_V | pg_nx; + } + for (i = 0; i < NKMSANORIGPML4E; i++) { + pm_pml4[KMSANORIGPML4I + i] = (KMSANORIGPDPphys + ptoa(i)) | + X86_PG_RW | X86_PG_V | pg_nx; + } #endif for (i = 0; i < ndmpdpphys; i++) { pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | @@ -4772,6 +4824,12 @@ pmap_release(pmap_t pmap) #ifdef KASAN for (i = 0; i < NKASANPML4E; i++) /* KASAN shadow map */ pmap->pm_pmltop[KASANPML4I + i] = 0; +#endif +#ifdef KMSAN + for (i = 0; i < NKMSANSHADPML4E; i++) /* KMSAN shadow map */ + pmap->pm_pmltop[KMSANSHADPML4I + i] = 0; + for (i = 0; i < NKMSANORIGPML4E; i++) /* KMSAN shadow map */ + pmap->pm_pmltop[KMSANORIGPML4I + i] = 0; #endif for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pmltop[DMPML4I + i] = 0; @@ -4814,6 +4872,60 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, kvm_free, "LU", "Amount of KVM free"); +#ifdef KMSAN +static void +pmap_kmsan_shadow_map_page_array(vm_paddr_t pdppa, vm_size_t size) +{ + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + vm_paddr_t dummypa, dummypd, dummypt; + int i, npde, npdpg; + + npdpg = howmany(size, NBPDP); + npde = size / NBPDR; + + dummypa = vm_phys_early_alloc(-1, PAGE_SIZE); + pagezero((void *)PHYS_TO_DMAP(dummypa)); + + dummypt = vm_phys_early_alloc(-1, PAGE_SIZE); + pagezero((void *)PHYS_TO_DMAP(dummypt)); + dummypd = vm_phys_early_alloc(-1, PAGE_SIZE * npdpg); + for (i = 0; i < npdpg; i++) + pagezero((void *)PHYS_TO_DMAP(dummypd + ptoa(i))); + + pte = (pt_entry_t *)PHYS_TO_DMAP(dummypt); + for (i = 0; i < NPTEPG; i++) + pte[i] = (pt_entry_t)(dummypa | X86_PG_V | X86_PG_RW | + X86_PG_A | X86_PG_M | pg_nx); + + pde = (pd_entry_t *)PHYS_TO_DMAP(dummypd); + for (i = 0; i < npde; i++) + pde[i] = (pd_entry_t)(dummypt | X86_PG_V | X86_PG_RW | pg_nx); + + pdpe = (pdp_entry_t *)PHYS_TO_DMAP(pdppa); + for (i = 0; i < npdpg; i++) + pdpe[i] = (pdp_entry_t)(dummypd + ptoa(i) | X86_PG_V | + X86_PG_RW | pg_nx); +} + +static void +pmap_kmsan_page_array_startup(vm_offset_t start, vm_offset_t end) +{ + vm_size_t size; + + KASSERT(start % NBPDP == 0, ("unaligned page array start address")); + + /* + * The end of the page array's KVA region is 2MB aligned, see + * kmem_init(). + */ + size = round_2mpage(end) - start; + pmap_kmsan_shadow_map_page_array(KMSANSHADPDPphys, size); + pmap_kmsan_shadow_map_page_array(KMSANORIGPDPphys, size); +} +#endif + /* * Allocate physical memory for the vm_page array and map it into KVA, * attempting to back the vm_pages with domain-local memory. @@ -4854,6 +4966,10 @@ pmap_page_array_startup(long pages) pde_store(pde, newpdir); } vm_page_array = (vm_page_t)start; + +#ifdef KMSAN + pmap_kmsan_page_array_startup(start, end); +#endif } /* @@ -4892,6 +5008,8 @@ pmap_growkernel(vm_offset_t addr) addr = vm_map_max(kernel_map); if (kernel_vm_end < addr) kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end); + if (kernel_vm_end < addr) + kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end); while (kernel_vm_end < addr) { pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); if ((*pdpe & X86_PG_V) == 0) { diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index cf1d2bd0a586..a76be23bbe91 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -134,7 +134,7 @@ #define IOPERM_BITMAP_SIZE (IOPAGES * PAGE_SIZE + 1) #ifndef KSTACK_PAGES -#ifdef KASAN +#if defined(KASAN) || defined(KMSAN) #define KSTACK_PAGES 6 #else #define KSTACK_PAGES 4 /* pages of kstack (with pcb) */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 61d0dea54210..6cb8b3f0071a 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -75,7 +75,7 @@ * of the direct mapped segment. This uses 2MB pages for reduced * TLB pressure. */ -#ifndef KASAN +#if !defined(KASAN) && !defined(KMSAN) #define UMA_MD_SMALL_ALLOC #endif diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 0c2f1689d194..b30139830a1b 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1175,13 +1175,15 @@ kmeminit(void) vm_kmem_size = round_page(vm_kmem_size); -#ifdef KASAN /* - * With KASAN enabled, dynamically allocated kernel memory is shadowed. - * Account for this when setting the UMA limit. + * With KASAN or KMSAN enabled, dynamically allocated kernel memory is + * shadowed. Account for this when setting the UMA limit. */ +#if defined(KASAN) vm_kmem_size = (vm_kmem_size * KASAN_SHADOW_SCALE) / (KASAN_SHADOW_SCALE + 1); +#elif defined(KMSAN) + vm_kmem_size /= 3; #endif #ifdef DEBUG_MEMGUARD diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 859ce3b58285..174892b374d1 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1044,13 +1044,22 @@ kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est) int tuned_nbuf; long maxbuf, maxbuf_sz, buf_sz, biotmap_sz; -#ifdef KASAN /* - * With KASAN enabled, the kernel map is shadowed. Account for this - * when sizing maps based on the amount of physical memory available. + * With KASAN or KMSAN enabled, the kernel map is shadowed. Account for + * this when sizing maps based on the amount of physical memory + * available. */ +#if defined(KASAN) physmem_est = (physmem_est * KASAN_SHADOW_SCALE) / (KASAN_SHADOW_SCALE + 1); +#elif defined(KMSAN) + physmem_est /= 3; + + /* + * KMSAN cannot reliably determine whether buffer data is initialized + * unless it is updated through a KVA mapping. + */ + unmapped_buf_allowed = 0; #endif /* diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 640e3d977e99..69f0a2dc2bbb 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -217,6 +217,15 @@ pbuf_zsecond_create(const char *name, int max) zone = uma_zsecond_create(name, pbuf_ctor, pbuf_dtor, NULL, NULL, pbuf_zone); + +#ifdef KMSAN + /* + * Shrink the size of the pbuf pools if KMSAN is enabled, otherwise the + * shadows of the large KVA allocations eat up too much memory. + */ + max /= 3; +#endif + /* * uma_prealloc() rounds up to items per slab. If we would prealloc * immediately on every pbuf_zsecond_create(), we may accumulate too