exec: Reimplement stack address randomization

The approach taken by the stack gap implementation was to insert a
random gap between the top of the fixed stack mapping and the true top
of the main process stack.  This approach was chosen so as to avoid
randomizing the previously fixed address of certain process metadata
stored at the top of the stack, but had some shortcomings.  In
particular, mlockall(2) calls would wire the gap, bloating the process'
memory usage, and RLIMIT_STACK included the size of the gap so small
(< several MB) limits could not be used.

There is little value in storing each process' ps_strings at a fixed
location, as only very old programs hard-code this address; consumers
were converted decades ago to use a sysctl-based interface for this
purpose.  Thus, this change re-implements stack address randomization by
simply breaking the convention of storing ps_strings at a fixed
location, and randomizing the location of the entire stack mapping.
This implementation is simpler and avoids the problems mentioned above,
while being unlikely to break compatibility anywhere the default ASLR
settings are used.

The kern.elfN.aslr.stack_gap sysctl is renamed to kern.elfN.aslr.stack,
and is re-enabled by default.

PR:		260303
Reviewed by:	kib
Discussed with:	emaste, mw
Sponsored by:	The FreeBSD Foundation

(cherry picked from commit 1811c1e957)
This commit is contained in:
Mark Johnston 2022-01-17 11:42:56 -05:00
parent e3b852f99b
commit 5fa005e915
9 changed files with 102 additions and 50 deletions

View File

@ -28,7 +28,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd February 28, 2021
.Dd January 14, 2022
.Dt SECURITY 7
.Os
.Sh NAME
@ -1062,19 +1062,19 @@ position-independent (PIE) 32bit binaries.
.It Dv kern.elf32.aslr.honor_sbrk
Makes ASLR less aggressive and more compatible with old binaries
relying on the sbrk area.
.It Dv kern.elf32.aslr.stack_gap
If ASLR is enabled for a binary, a non-zero value creates a randomized
stack gap between strings and the end of the aux vector.
The value is the maximum percentage of main stack to waste on the gap.
Cannot be greater than 50, i.e., at most half of the stack.
.It Dv kern.elf32.aslr.stack
If ASLR is enabled for a binary, a non-zero value enables randomization
of the stack.
Otherwise, the stack is mapped at a fixed location determined by the
process ABI.
.It Dv kern.elf64.aslr.enable
64bit binaries ASLR control.
.It Dv kern.elf64.aslr.pie_enable
64bit PIE binaries ASLR control.
.It Dv kern.elf64.aslr.honor_sbrk
64bit binaries ASLR sbrk compatibility control.
.It Dv kern.elf64.aslr.stack_gap
Controls stack gap for 64bit binaries.
.It Dv kern.elf64.aslr.stack
Controls stack address randomization for 64bit binaries.
.It Dv kern.elf32.nxstack
Enables non-executable stack for 32bit processes.
Enabled by default if supported by hardware and corresponding binary.

View File

@ -213,6 +213,10 @@ exec_linux_imgact(struct image_params *imgp)
vmspace->vm_daddr =
(caddr_t)(void *)(uintptr_t)(virtual_offset + a_out->a_text);
error = exec_map_stack(imgp);
if (error != 0)
goto fail;
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;

View File

@ -348,6 +348,10 @@ exec_aout_imgact(struct image_params *imgp)
vmspace->vm_daddr = (caddr_t) (uintptr_t)
(virtual_offset + a_out->a_text);
error = exec_map_stack(imgp);
if (error != 0)
return (error);
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;

View File

@ -188,11 +188,11 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
&__elfN(aslr_honor_sbrk), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
static int __elfN(aslr_stack_gap) = 3;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW,
&__elfN(aslr_stack_gap), 0,
static int __elfN(aslr_stack) = 1;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
&__elfN(aslr_stack), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
": maximum percentage of main stack to waste on a random gap");
": enable stack address randomization");
static int __elfN(sigfastblock) = 1;
SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
@ -1290,6 +1290,8 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
if (!__elfN(aslr_honor_sbrk) ||
(imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
imgp->map_flags |= MAP_ASLR_IGNSTART;
if (__elfN(aslr_stack))
imgp->map_flags |= MAP_ASLR_STACK;
}
if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 &&
@ -1298,13 +1300,15 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
imgp->map_flags |= MAP_WXORX;
error = exec_new_vmspace(imgp, sv);
vmspace = imgp->proc->p_vmspace;
map = &vmspace->vm_map;
imgp->proc->p_sysent = sv;
maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
if (mapsz >= maxv - vm_map_min(map)) {
vmspace = imgp->proc->p_vmspace;
map = &vmspace->vm_map;
maxv = sv->sv_usrstack;
if ((imgp->map_flags & MAP_ASLR_STACK) == 0)
maxv -= lim_max(td, RLIMIT_STACK);
if (error == 0 && mapsz >= maxv - vm_map_min(map)) {
uprintf("Excessive mapping size\n");
error = ENOEXEC;
}
@ -1330,8 +1334,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
if (error != 0)
goto ret;
entry = (u_long)hdr->e_entry + et_dyn_addr;
/*
* We load the dynamic linker where a userland call
* to mmap(0, ...) would put it. The rationale behind this
@ -1352,6 +1354,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
map->anon_loc = addr;
}
entry = (u_long)hdr->e_entry + et_dyn_addr;
imgp->entry_addr = entry;
if (interp != NULL) {
@ -1372,6 +1375,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
} else
addr = et_dyn_addr;
error = exec_map_stack(imgp);
if (error != 0)
goto ret;
/*
* Construct auxargs table (used by the copyout_auxargs routine)
*/

View File

@ -178,19 +178,19 @@ static int
sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
{
struct proc *p;
int error;
vm_offset_t val;
p = curproc;
#ifdef SCTL_MASK32
if (req->flags & SCTL_MASK32) {
unsigned int val;
val = (unsigned int)p->p_sysent->sv_usrstack;
error = SYSCTL_OUT(req, &val, sizeof(val));
} else
unsigned int val32;
val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop);
return (SYSCTL_OUT(req, &val32, sizeof(val32)));
}
#endif
error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
sizeof(p->p_sysent->sv_usrstack));
return error;
val = round_page(p->p_vmspace->vm_stacktop);
return (SYSCTL_OUT(req, &val, sizeof(val)));
}
static int
@ -1119,9 +1119,8 @@ exec_free_abi_mappings(struct proc *p)
}
/*
* Destroy old address space, and allocate a new stack.
* The new stack is only sgrowsiz large because it is grown
* automatically on a page fault.
* Run down the current address space and install a new one. Map the shared
* page.
*/
int
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
@ -1131,11 +1130,8 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
struct vmspace *vmspace = p->p_vmspace;
struct thread *td = curthread;
vm_object_t obj;
struct rlimit rlim_stack;
vm_offset_t sv_minuser, stack_addr;
vm_offset_t sv_minuser;
vm_map_t map;
vm_prot_t stack_prot;
u_long ssiz;
imgp->vmspace_destroyed = true;
imgp->sysent = sv;
@ -1172,7 +1168,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
*/
vm_map_lock(map);
vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
MAP_ASLR_IGNSTART | MAP_WXORX);
MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX);
vm_map_unlock(map);
} else {
error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
@ -1198,7 +1194,28 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
}
}
/* Allocate a new stack */
return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
}
/*
* Compute the stack size limit and map the main process stack.
*/
int
exec_map_stack(struct image_params *imgp)
{
struct rlimit rlim_stack;
struct sysentvec *sv;
struct proc *p;
vm_map_t map;
struct vmspace *vmspace;
vm_offset_t stack_addr, stack_top;
u_long ssiz;
int error, find_space, stack_off;
vm_prot_t stack_prot;
p = imgp->proc;
sv = p->p_sysent;
if (imgp->stack_sz != 0) {
ssiz = trunc_page(imgp->stack_sz);
PROC_LOCK(p);
@ -1215,25 +1232,44 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
} else {
ssiz = maxssiz;
}
stack_addr = sv->sv_usrstack - ssiz;
stack_prot = obj != NULL && imgp->stack_prot != 0 ?
vmspace = p->p_vmspace;
map = &vmspace->vm_map;
stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ?
imgp->stack_prot : sv->sv_stackprot;
error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot,
VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
if ((map->flags & MAP_ASLR_STACK) != 0) {
stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
lim_max(curthread, RLIMIT_DATA));
find_space = VMFS_ANY_SPACE;
} else {
stack_addr = sv->sv_usrstack - ssiz;
find_space = VMFS_NO_SPACE;
}
error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz,
sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL,
MAP_STACK_GROWS_DOWN);
if (error != KERN_SUCCESS) {
uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x "
"failed mach error %d errno %d\n", (uintmax_t)ssiz,
"failed, mach error %d errno %d\n", (uintmax_t)ssiz,
stack_prot, error, vm_mmap_to_errno(error));
return (vm_mmap_to_errno(error));
}
vmspace->vm_stkgap = 0;
stack_top = stack_addr + ssiz;
if ((map->flags & MAP_ASLR_STACK) != 0) {
/* Randomize within the first page of the stack. */
arc4rand(&stack_off, sizeof(stack_off), 0);
stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *));
}
/*
* vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
* are still used to enforce the stack rlimit on the process stack.
*/
vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
vmspace->vm_maxsaddr = (char *)stack_addr;
vmspace->vm_stacktop = stack_top;
vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
return (0);
}

View File

@ -77,7 +77,8 @@ struct execsw {
* Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant.
*/
#define PS_STRINGS (USRSTACK - sizeof(struct ps_strings))
#define PROC_PS_STRINGS(p) ((p)->p_sysent->sv_psstrings)
#define PROC_PS_STRINGS(p) \
((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz)
int exec_map_first_page(struct image_params *);
void exec_unmap_first_page(struct image_params *);

View File

@ -114,6 +114,7 @@ int exec_check_permissions(struct image_params *);
void exec_cleanup(struct thread *td, struct vmspace *);
int exec_copyout_strings(struct image_params *, uintptr_t *);
void exec_free_args(struct image_args *);
int exec_map_stack(struct image_params *);
int exec_new_vmspace(struct image_params *, struct sysentvec *);
void exec_setregs(struct thread *, struct image_params *, uintptr_t);
int exec_shell_imgact(struct image_params *);

View File

@ -343,7 +343,6 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
vm->vm_taddr = 0;
vm->vm_daddr = 0;
vm->vm_maxsaddr = 0;
vm->vm_stkgap = 0;
return (vm);
}
@ -4266,7 +4265,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
vm2->vm_taddr = vm1->vm_taddr;
vm2->vm_daddr = vm1->vm_daddr;
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
vm2->vm_stkgap = vm1->vm_stkgap;
vm_map_lock(old_map);
if (old_map->busy)
vm_map_wait_busy(old_map);
@ -4285,7 +4283,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
new_map->anon_loc = old_map->anon_loc;
new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART |
MAP_WXORX);
MAP_ASLR_STACK | MAP_WXORX);
VM_MAP_ENTRY_FOREACH(old_entry, old_map) {
if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)

View File

@ -223,12 +223,13 @@ struct vm_map {
* vm_flags_t values
*/
#define MAP_WIREFUTURE 0x01 /* wire all future pages */
#define MAP_BUSY_WAKEUP 0x02
#define MAP_BUSY_WAKEUP 0x02 /* thread(s) waiting on busy state */
#define MAP_IS_SUB_MAP 0x04 /* has parent */
#define MAP_ASLR 0x08 /* enabled ASLR */
#define MAP_ASLR_IGNSTART 0x10
#define MAP_REPLENISH 0x20
#define MAP_ASLR_IGNSTART 0x10 /* ASLR ignores data segment */
#define MAP_REPLENISH 0x20 /* kmapent zone needs to be refilled */
#define MAP_WXORX 0x40 /* enforce W^X */
#define MAP_ASLR_STACK 0x80 /* stack location is randomized */
#ifdef _KERNEL
#if defined(KLD_MODULE) && !defined(KLD_TIED)
@ -293,7 +294,7 @@ struct vmspace {
caddr_t vm_taddr; /* (c) user virtual address of text */
caddr_t vm_daddr; /* (c) user virtual address of data */
caddr_t vm_maxsaddr; /* user VA at max stack growth */
vm_size_t vm_stkgap; /* stack gap size in bytes */
vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */
u_int vm_refcnt; /* number of references */
/*
* Keep the PMAP last, so that CPU-specific variations of that