diff --git a/share/man/man9/malloc.9 b/share/man/man9/malloc.9 index 82294a6897f5..26ea111641c2 100644 --- a/share/man/man9/malloc.9 +++ b/share/man/man9/malloc.9 @@ -29,7 +29,7 @@ .\" $NetBSD: malloc.9,v 1.3 1996/11/11 00:05:11 lukem Exp $ .\" $FreeBSD$ .\" -.Dd January 24, 2018 +.Dd June 13, 2018 .Dt MALLOC 9 .Os .Sh NAME @@ -189,6 +189,11 @@ This option should only be used in combination with .Dv M_NOWAIT when an allocation failure cannot be tolerated by the caller without catastrophic effects on the system. +.It Dv M_EXEC +Indicates that the system should allocate executable memory. +If this flag is not set, the system will not allocate executable memory. +Not all platforms enforce a distinction between executable and +non-executable memory. .El .Pp Exactly one of either diff --git a/share/man/man9/zone.9 b/share/man/man9/zone.9 index 0e8d7838e193..76dea4bae189 100644 --- a/share/man/man9/zone.9 +++ b/share/man/man9/zone.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 26, 2017 +.Dd June 13, 2018 .Dt ZONE 9 .Os .Sh NAME @@ -375,6 +375,15 @@ if the zone ran out of unused items and .Dv M_NOWAIT was specified. +.Sh IMPLEMENTATION NOTES +The memory that these allocation calls return is not executable. +The +.Fn uma_zalloc +function does not support the +.Dv M_EXEC +flag to allocate executable memory. +Not all platforms enforce a distinction between executable and +non-executable memory. .Sh SEE ALSO .Xr malloc 9 .Sh HISTORY diff --git a/sys/amd64/amd64/bpf_jit_machdep.c b/sys/amd64/amd64/bpf_jit_machdep.c index 8d4ddaa24135..d65978e9c84c 100644 --- a/sys/amd64/amd64/bpf_jit_machdep.c +++ b/sys/amd64/amd64/bpf_jit_machdep.c @@ -44,9 +44,6 @@ __FBSDID("$FreeBSD$"); #include #include -#include -#include -#include #else #include #include @@ -605,11 +602,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) *size = stream.cur_ip; #ifdef _KERNEL - /* - * We cannot use malloc(9) because DMAP is mapped as NX. - */ - stream.ibuf = (void *)kmem_malloc(kernel_arena, *size, - M_NOWAIT); + stream.ibuf = malloc(*size, M_BPFJIT, M_EXEC | M_NOWAIT); if (stream.ibuf == NULL) break; #else @@ -658,14 +651,3 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) return ((bpf_filter_func)(void *)stream.ibuf); } - -void -bpf_jit_free(void *func, size_t size) -{ - -#ifdef _KERNEL - kmem_free(kernel_arena, (vm_offset_t)func, size); -#else - munmap(func, size); -#endif -} diff --git a/sys/i386/i386/bpf_jit_machdep.c b/sys/i386/i386/bpf_jit_machdep.c index ff3fa3fb0750..6d77d0ba6f30 100644 --- a/sys/i386/i386/bpf_jit_machdep.c +++ b/sys/i386/i386/bpf_jit_machdep.c @@ -632,7 +632,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) *size = stream.cur_ip; #ifdef _KERNEL - stream.ibuf = malloc(*size, M_BPFJIT, M_NOWAIT); + stream.ibuf = malloc(*size, M_BPFJIT, M_EXEC | M_NOWAIT); if (stream.ibuf == NULL) break; #else @@ -681,14 +681,3 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, size_t *size) return ((bpf_filter_func)(void *)stream.ibuf); } - -void -bpf_jit_free(void *func, size_t size) -{ - -#ifdef _KERNEL - free(func, M_BPFJIT); -#else - munmap(func, size); -#endif -} diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 50dfe2507b62..3bc0e6ee0d11 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -564,7 +564,7 @@ void * return (va); #endif - if (size <= kmem_zmax) { + if (size <= kmem_zmax && (flags & M_EXEC) == 0) { if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; @@ -609,7 +609,7 @@ malloc_domain(size_t size, struct malloc_type *mtp, int domain, if (malloc_dbg(&va, &size, mtp, flags) != 0) return (va); #endif - if (size <= kmem_zmax) { + if (size <= kmem_zmax && (flags & M_EXEC) == 0) { if (size & KMEM_ZMASK) size = (size & ~KMEM_ZMASK) + KMEM_ZBASE; indx = kmemsize[size >> KMEM_ZSHIFT]; diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 6e6ce3ee6b2c..92da3f9a8577 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -241,6 +241,9 @@ static struct vmem buffer_arena_storage; static struct vmem transient_arena_storage; /* kernel and kmem arenas are aliased for backwards KPI compat. */ vmem_t *kernel_arena = &kernel_arena_storage; +#if VM_NRESERVLEVEL > 0 +vmem_t *kernel_rwx_arena = NULL; +#endif vmem_t *kmem_arena = &kernel_arena_storage; vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; diff --git a/sys/net/bpf_jitter.c b/sys/net/bpf_jitter.c index 276c10dd57fb..abe7bfa465fd 100644 --- a/sys/net/bpf_jitter.c +++ b/sys/net/bpf_jitter.c @@ -101,11 +101,13 @@ void bpf_destroy_jit_filter(bpf_jit_filter *filter) { - if (filter->func != bpf_jit_accept_all) - bpf_jit_free(filter->func, filter->size); #ifdef _KERNEL + if (filter->func != bpf_jit_accept_all) + free(filter->func, M_BPFJIT); free(filter, M_BPFJIT); #else + if (filter->func != bpf_jit_accept_all) + munmap(filter->func, filter->size); free(filter); #endif } diff --git a/sys/net/bpf_jitter.h b/sys/net/bpf_jitter.h index a7c7cd9f7320..23049d148567 100644 --- a/sys/net/bpf_jitter.h +++ b/sys/net/bpf_jitter.h @@ -88,6 +88,5 @@ void bpf_destroy_jit_filter(bpf_jit_filter *filter); struct bpf_insn; bpf_filter_func bpf_jit_compile(struct bpf_insn *, u_int, size_t *); -void bpf_jit_free(void *, size_t); #endif /* _NET_BPF_JITTER_H_ */ diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index c8edd207cdcc..e9908f558a19 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -49,7 +49,7 @@ #define MINALLOCSIZE UMA_SMALLEST_UNIT /* - * flags to malloc. + * Flags to memory allocation functions. */ #define M_NOWAIT 0x0001 /* do not block */ #define M_WAITOK 0x0002 /* ok to block */ @@ -59,6 +59,7 @@ #define M_NODUMP 0x0800 /* don't dump pages in this allocation */ #define M_FIRSTFIT 0x1000 /* Only for vmem, fast fit. */ #define M_BESTFIT 0x2000 /* Only for vmem, low fragmentation. */ +#define M_EXEC 0x4000 /* allocate executable space. */ #define M_MAGIC 877983977 /* time when first defined :-) */ diff --git a/sys/vm/uma.h b/sys/vm/uma.h index 9fe51e515f4a..ed33ae030728 100644 --- a/sys/vm/uma.h +++ b/sys/vm/uma.h @@ -617,11 +617,12 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef); * These flags are setable in the allocf and visible in the freef. */ #define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ +#define UMA_SLAB_KRWX 0x02 /* Slab alloced from kernel_rwx_arena */ #define UMA_SLAB_KERNEL 0x04 /* Slab alloced from kernel_map */ #define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ #define UMA_SLAB_OFFP 0x10 /* Slab is managed separately */ #define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ -/* 0x02, 0x40 and 0x80 are available */ +/* 0x40 and 0x80 are available */ /* * Used to pre-fill a zone with some number of items diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index c44b10cd3f14..16a702cb4aa7 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1167,7 +1167,7 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, void *p; /* Returned page */ *pflag = UMA_SLAB_KERNEL; - p = (void *) kmem_malloc_domain(domain, bytes, wait); + p = (void *) kmem_malloc_domain(kernel_arena, domain, bytes, wait); return (p); } @@ -2280,6 +2280,7 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "uma_zalloc_arg: zone \"%s\"", zone->uz_name); } + KASSERT((flags & M_EXEC) == 0, ("uma_zalloc_arg: called with M_EXEC")); KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("uma_zalloc_arg: called with spinlock or critical section held")); if (zone->uz_flags & UMA_ZONE_PCPU) @@ -3587,20 +3588,34 @@ uma_zone_exhausted_nolock(uma_zone_t zone) void * uma_large_malloc_domain(vm_size_t size, int domain, int wait) { + struct vmem *arena; vm_offset_t addr; uma_slab_t slab; +#if VM_NRESERVLEVEL > 0 + if (__predict_true((wait & M_EXEC) == 0)) + arena = kernel_arena; + else + arena = kernel_rwx_arena; +#else + arena = kernel_arena; +#endif + slab = zone_alloc_item(slabzone, NULL, domain, wait); if (slab == NULL) return (NULL); if (domain == UMA_ANYDOMAIN) - addr = kmem_malloc(kernel_arena, size, wait); + addr = kmem_malloc(arena, size, wait); else - addr = kmem_malloc_domain(domain, size, wait); + addr = kmem_malloc_domain(arena, domain, size, wait); if (addr != 0) { vsetslab(addr, slab); slab->us_data = (void *)addr; slab->us_flags = UMA_SLAB_KERNEL | UMA_SLAB_MALLOC; +#if VM_NRESERVLEVEL > 0 + if (__predict_false(arena == kernel_rwx_arena)) + slab->us_flags |= UMA_SLAB_KRWX; +#endif slab->us_size = size; slab->us_domain = vm_phys_domain(PHYS_TO_VM_PAGE( pmap_kextract(addr))); @@ -3622,10 +3637,19 @@ uma_large_malloc(vm_size_t size, int wait) void uma_large_free(uma_slab_t slab) { + struct vmem *arena; KASSERT((slab->us_flags & UMA_SLAB_KERNEL) != 0, ("uma_large_free: Memory not allocated with uma_large_malloc.")); - kmem_free(kernel_arena, (vm_offset_t)slab->us_data, slab->us_size); +#if VM_NRESERVLEVEL > 0 + if (__predict_true((slab->us_flags & UMA_SLAB_KRWX) == 0)) + arena = kernel_arena; + else + arena = kernel_rwx_arena; +#else + arena = kernel_arena; +#endif + kmem_free(arena, (vm_offset_t)slab->us_data, slab->us_size); uma_total_dec(slab->us_size); zone_free_item(slabzone, slab, NULL, SKIP_NONE); } diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index fde50f4a26ef..22f945b197dc 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -65,7 +65,8 @@ vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); -vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); +vm_offset_t kmem_malloc_domain(struct vmem *, int domain, vm_size_t size, + int flags); void kmem_free(struct vmem *, vm_offset_t, vm_size_t); /* This provides memory for previously allocated address space. */ diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 0b721d0acce9..0a1af123ddbc 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -135,6 +135,23 @@ kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp) return (0); } +#if VM_NRESERVLEVEL > 0 +/* + * Import a superpage from the normal kernel arena into the special + * arena for allocations with different permissions. + */ +static int +kernel_rwx_alloc(void *arena, vmem_size_t size, int flags, vmem_addr_t *addrp) +{ + + KASSERT((size % KVA_QUANTUM) == 0, + ("kernel_rwx_alloc: Size %jd is not a multiple of %d", + (intmax_t)size, (int)KVA_QUANTUM)); + return (vmem_xalloc(arena, size, KVA_QUANTUM, 0, 0, VMEM_ADDR_MIN, + VMEM_ADDR_MAX, flags, addrp)); +} +#endif + /* * vm_init initializes the virtual memory system. * This is done only by the first cpu up. @@ -173,12 +190,31 @@ vm_mem_init(dummy) vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0); vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM); +#if VM_NRESERVLEVEL > 0 + /* + * In an architecture with superpages, maintain a separate arena + * for allocations with permissions that differ from the "standard" + * read/write permissions used for memory in the kernel_arena. + */ + kernel_rwx_arena = vmem_create("kernel rwx arena", 0, 0, PAGE_SIZE, + 0, M_WAITOK); + vmem_set_import(kernel_rwx_arena, kernel_rwx_alloc, + (vmem_release_t *)vmem_xfree, kernel_arena, KVA_QUANTUM); +#endif + for (domain = 0; domain < vm_ndomains; domain++) { vm_dom[domain].vmd_kernel_arena = vmem_create( "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); vmem_set_import(vm_dom[domain].vmd_kernel_arena, (vmem_import_t *)vmem_alloc, NULL, kernel_arena, KVA_QUANTUM); +#if VM_NRESERVLEVEL > 0 + vm_dom[domain].vmd_kernel_rwx_arena = vmem_create( + "kernel rwx arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); + vmem_set_import(vm_dom[domain].vmd_kernel_rwx_arena, + kernel_rwx_alloc, (vmem_release_t *)vmem_xfree, + vm_dom[domain].vmd_kernel_arena, KVA_QUANTUM); +#endif } #ifndef UMA_MD_SMALL_ALLOC diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 0ff1b155c069..037db5723396 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -212,8 +212,8 @@ kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; - pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, - VM_PROT_ALL | PMAP_ENTER_WIRED, 0); + pmap_enter(kernel_pmap, addr + i, m, VM_PROT_RW, + VM_PROT_RW | PMAP_ENTER_WIRED, 0); } VM_OBJECT_WUNLOCK(object); return (addr); @@ -298,8 +298,8 @@ kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; - pmap_enter(kernel_pmap, tmp, m, VM_PROT_ALL, - VM_PROT_ALL | PMAP_ENTER_WIRED, 0); + pmap_enter(kernel_pmap, tmp, m, VM_PROT_RW, + VM_PROT_RW | PMAP_ENTER_WIRED, 0); tmp += PAGE_SIZE; } VM_OBJECT_WUNLOCK(object); @@ -372,20 +372,32 @@ kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, * Allocate wired-down pages in the kernel's address space. */ vm_offset_t -kmem_malloc_domain(int domain, vm_size_t size, int flags) +kmem_malloc_domain(struct vmem *vmem, int domain, vm_size_t size, int flags) { - vmem_t *vmem; + vmem_t *arena; vm_offset_t addr; int rv; - vmem = vm_dom[domain].vmd_kernel_arena; +#if VM_NRESERVLEVEL > 0 + KASSERT(vmem == kernel_arena || vmem == kernel_rwx_arena, + ("kmem_malloc_domain: Only kernel_arena or kernel_rwx_arena " + "are supported.")); + if (__predict_true(vmem == kernel_arena)) + arena = vm_dom[domain].vmd_kernel_arena; + else + arena = vm_dom[domain].vmd_kernel_rwx_arena; +#else + KASSERT(vmem == kernel_arena, + ("kmem_malloc_domain: Only kernel_arena is supported.")); + arena = vm_dom[domain].vmd_kernel_arena; +#endif size = round_page(size); - if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) + if (vmem_alloc(arena, size, flags | M_BESTFIT, &addr)) return (0); rv = kmem_back_domain(domain, kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { - vmem_free(vmem, addr, size); + vmem_free(arena, addr, size); return (0); } return (addr); @@ -398,12 +410,9 @@ kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) vm_offset_t addr; int domain; - KASSERT(vmem == kernel_arena, - ("kmem_malloc: Only kernel_arena is supported.")); - vm_domainset_iter_malloc_init(&di, kernel_object, &domain, &flags); do { - addr = kmem_malloc_domain(domain, size, flags); + addr = kmem_malloc_domain(vmem, domain, size, flags); if (addr != 0) break; } while (vm_domainset_iter_malloc(&di, &domain, &flags) == 0); @@ -422,6 +431,7 @@ kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, { vm_offset_t offset, i; vm_page_t m, mpred; + vm_prot_t prot; int pflags; KASSERT(object == kernel_object, @@ -432,6 +442,7 @@ kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, pflags &= ~(VM_ALLOC_NOWAIT | VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL); if (flags & M_WAITOK) pflags |= VM_ALLOC_WAITFAIL; + prot = (flags & M_EXEC) != 0 ? VM_PROT_ALL : VM_PROT_RW; i = 0; VM_OBJECT_WLOCK(object); @@ -461,8 +472,8 @@ kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("kmem_malloc: page %p is managed", m)); m->valid = VM_PAGE_BITS_ALL; - pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, - VM_PROT_ALL | PMAP_ENTER_WIRED, 0); + pmap_enter(kernel_pmap, addr + i, m, prot, + prot | PMAP_ENTER_WIRED, 0); } VM_OBJECT_WUNLOCK(object); @@ -542,13 +553,28 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { + struct vmem *arena; int domain; +#if VM_NRESERVLEVEL > 0 + KASSERT(vmem == kernel_arena || vmem == kernel_rwx_arena, + ("kmem_free: Only kernel_arena or kernel_rwx_arena are supported.")); +#else KASSERT(vmem == kernel_arena, ("kmem_free: Only kernel_arena is supported.")); +#endif + size = round_page(size); domain = _kmem_unback(kernel_object, addr, size); - vmem_free(vm_dom[domain].vmd_kernel_arena, addr, size); +#if VM_NRESERVLEVEL > 0 + if (__predict_true(vmem == kernel_arena)) + arena = vm_dom[domain].vmd_kernel_arena; + else + arena = vm_dom[domain].vmd_kernel_rwx_arena; +#else + arena = vm_dom[domain].vmd_kernel_arena; +#endif + vmem_free(arena, addr, size); } /* diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h index 20e847f5e5af..8d49a598f26d 100644 --- a/sys/vm/vm_kern.h +++ b/sys/vm/vm_kern.h @@ -70,6 +70,7 @@ extern vm_map_t kernel_map; extern vm_map_t exec_map; extern vm_map_t pipe_map; extern struct vmem *kernel_arena; +extern struct vmem *kernel_rwx_arena; extern struct vmem *kmem_arena; extern struct vmem *buffer_arena; extern struct vmem *transient_arena; diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index a53e0a5f6791..fb56bdf2fdfc 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -103,7 +103,8 @@ struct vm_domain { struct mtx_padalign vmd_free_mtx; struct mtx_padalign vmd_pageout_mtx; uma_zone_t vmd_pgcache; /* (c) page free cache. */ - struct vmem *vmd_kernel_arena; /* (c) per-domain kva arena. */ + struct vmem *vmd_kernel_arena; /* (c) per-domain kva R/W arena. */ + struct vmem *vmd_kernel_rwx_arena; /* (c) per-domain kva R/W/X arena. */ u_int vmd_domain; /* (c) Domain number. */ u_int vmd_page_count; /* (c) Total page count. */ long vmd_segs; /* (c) bitmask of the segments */