exec: Reimplement stack address randomization
The approach taken by the stack gap implementation was to insert a
random gap between the top of the fixed stack mapping and the true top
of the main process stack. This approach was chosen so as to avoid
randomizing the previously fixed address of certain process metadata
stored at the top of the stack, but had some shortcomings. In
particular, mlockall(2) calls would wire the gap, bloating the process'
memory usage, and RLIMIT_STACK included the size of the gap so small
(< several MB) limits could not be used.
There is little value in storing each process' ps_strings at a fixed
location, as only very old programs hard-code this address; consumers
were converted decades ago to use a sysctl-based interface for this
purpose. Thus, this change re-implements stack address randomization by
simply breaking the convention of storing ps_strings at a fixed
location, and randomizing the location of the entire stack mapping.
This implementation is simpler and avoids the problems mentioned above,
while being unlikely to break compatibility anywhere the default ASLR
settings are used.
The kern.elfN.aslr.stack_gap sysctl is renamed to kern.elfN.aslr.stack,
and is re-enabled by default.
PR: 260303
Reviewed by: kib
Discussed with: emaste, mw
Sponsored by: The FreeBSD Foundation
(cherry picked from commit 1811c1e957
)
This commit is contained in:
parent
e3b852f99b
commit
5fa005e915
@ -28,7 +28,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd February 28, 2021
|
||||
.Dd January 14, 2022
|
||||
.Dt SECURITY 7
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -1062,19 +1062,19 @@ position-independent (PIE) 32bit binaries.
|
||||
.It Dv kern.elf32.aslr.honor_sbrk
|
||||
Makes ASLR less aggressive and more compatible with old binaries
|
||||
relying on the sbrk area.
|
||||
.It Dv kern.elf32.aslr.stack_gap
|
||||
If ASLR is enabled for a binary, a non-zero value creates a randomized
|
||||
stack gap between strings and the end of the aux vector.
|
||||
The value is the maximum percentage of main stack to waste on the gap.
|
||||
Cannot be greater than 50, i.e., at most half of the stack.
|
||||
.It Dv kern.elf32.aslr.stack
|
||||
If ASLR is enabled for a binary, a non-zero value enables randomization
|
||||
of the stack.
|
||||
Otherwise, the stack is mapped at a fixed location determined by the
|
||||
process ABI.
|
||||
.It Dv kern.elf64.aslr.enable
|
||||
64bit binaries ASLR control.
|
||||
.It Dv kern.elf64.aslr.pie_enable
|
||||
64bit PIE binaries ASLR control.
|
||||
.It Dv kern.elf64.aslr.honor_sbrk
|
||||
64bit binaries ASLR sbrk compatibility control.
|
||||
.It Dv kern.elf64.aslr.stack_gap
|
||||
Controls stack gap for 64bit binaries.
|
||||
.It Dv kern.elf64.aslr.stack
|
||||
Controls stack address randomization for 64bit binaries.
|
||||
.It Dv kern.elf32.nxstack
|
||||
Enables non-executable stack for 32bit processes.
|
||||
Enabled by default if supported by hardware and corresponding binary.
|
||||
|
@ -213,6 +213,10 @@ exec_linux_imgact(struct image_params *imgp)
|
||||
vmspace->vm_daddr =
|
||||
(caddr_t)(void *)(uintptr_t)(virtual_offset + a_out->a_text);
|
||||
|
||||
error = exec_map_stack(imgp);
|
||||
if (error != 0)
|
||||
goto fail;
|
||||
|
||||
/* Fill in image_params */
|
||||
imgp->interpreted = 0;
|
||||
imgp->entry_addr = a_out->a_entry;
|
||||
|
@ -348,6 +348,10 @@ exec_aout_imgact(struct image_params *imgp)
|
||||
vmspace->vm_daddr = (caddr_t) (uintptr_t)
|
||||
(virtual_offset + a_out->a_text);
|
||||
|
||||
error = exec_map_stack(imgp);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
/* Fill in image_params */
|
||||
imgp->interpreted = 0;
|
||||
imgp->entry_addr = a_out->a_entry;
|
||||
|
@ -188,11 +188,11 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
|
||||
&__elfN(aslr_honor_sbrk), 0,
|
||||
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");
|
||||
|
||||
static int __elfN(aslr_stack_gap) = 3;
|
||||
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW,
|
||||
&__elfN(aslr_stack_gap), 0,
|
||||
static int __elfN(aslr_stack) = 1;
|
||||
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
|
||||
&__elfN(aslr_stack), 0,
|
||||
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
|
||||
": maximum percentage of main stack to waste on a random gap");
|
||||
": enable stack address randomization");
|
||||
|
||||
static int __elfN(sigfastblock) = 1;
|
||||
SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
|
||||
@ -1290,6 +1290,8 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
if (!__elfN(aslr_honor_sbrk) ||
|
||||
(imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
|
||||
imgp->map_flags |= MAP_ASLR_IGNSTART;
|
||||
if (__elfN(aslr_stack))
|
||||
imgp->map_flags |= MAP_ASLR_STACK;
|
||||
}
|
||||
|
||||
if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 &&
|
||||
@ -1298,13 +1300,15 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
imgp->map_flags |= MAP_WXORX;
|
||||
|
||||
error = exec_new_vmspace(imgp, sv);
|
||||
vmspace = imgp->proc->p_vmspace;
|
||||
map = &vmspace->vm_map;
|
||||
|
||||
imgp->proc->p_sysent = sv;
|
||||
|
||||
maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
|
||||
if (mapsz >= maxv - vm_map_min(map)) {
|
||||
vmspace = imgp->proc->p_vmspace;
|
||||
map = &vmspace->vm_map;
|
||||
maxv = sv->sv_usrstack;
|
||||
if ((imgp->map_flags & MAP_ASLR_STACK) == 0)
|
||||
maxv -= lim_max(td, RLIMIT_STACK);
|
||||
if (error == 0 && mapsz >= maxv - vm_map_min(map)) {
|
||||
uprintf("Excessive mapping size\n");
|
||||
error = ENOEXEC;
|
||||
}
|
||||
@ -1330,8 +1334,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
if (error != 0)
|
||||
goto ret;
|
||||
|
||||
entry = (u_long)hdr->e_entry + et_dyn_addr;
|
||||
|
||||
/*
|
||||
* We load the dynamic linker where a userland call
|
||||
* to mmap(0, ...) would put it. The rationale behind this
|
||||
@ -1352,6 +1354,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
map->anon_loc = addr;
|
||||
}
|
||||
|
||||
entry = (u_long)hdr->e_entry + et_dyn_addr;
|
||||
imgp->entry_addr = entry;
|
||||
|
||||
if (interp != NULL) {
|
||||
@ -1372,6 +1375,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
|
||||
} else
|
||||
addr = et_dyn_addr;
|
||||
|
||||
error = exec_map_stack(imgp);
|
||||
if (error != 0)
|
||||
goto ret;
|
||||
|
||||
/*
|
||||
* Construct auxargs table (used by the copyout_auxargs routine)
|
||||
*/
|
||||
|
@ -178,19 +178,19 @@ static int
|
||||
sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
struct proc *p;
|
||||
int error;
|
||||
vm_offset_t val;
|
||||
|
||||
p = curproc;
|
||||
#ifdef SCTL_MASK32
|
||||
if (req->flags & SCTL_MASK32) {
|
||||
unsigned int val;
|
||||
val = (unsigned int)p->p_sysent->sv_usrstack;
|
||||
error = SYSCTL_OUT(req, &val, sizeof(val));
|
||||
} else
|
||||
unsigned int val32;
|
||||
|
||||
val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop);
|
||||
return (SYSCTL_OUT(req, &val32, sizeof(val32)));
|
||||
}
|
||||
#endif
|
||||
error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
|
||||
sizeof(p->p_sysent->sv_usrstack));
|
||||
return error;
|
||||
val = round_page(p->p_vmspace->vm_stacktop);
|
||||
return (SYSCTL_OUT(req, &val, sizeof(val)));
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1119,9 +1119,8 @@ exec_free_abi_mappings(struct proc *p)
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy old address space, and allocate a new stack.
|
||||
* The new stack is only sgrowsiz large because it is grown
|
||||
* automatically on a page fault.
|
||||
* Run down the current address space and install a new one. Map the shared
|
||||
* page.
|
||||
*/
|
||||
int
|
||||
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
|
||||
@ -1131,11 +1130,8 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
|
||||
struct vmspace *vmspace = p->p_vmspace;
|
||||
struct thread *td = curthread;
|
||||
vm_object_t obj;
|
||||
struct rlimit rlim_stack;
|
||||
vm_offset_t sv_minuser, stack_addr;
|
||||
vm_offset_t sv_minuser;
|
||||
vm_map_t map;
|
||||
vm_prot_t stack_prot;
|
||||
u_long ssiz;
|
||||
|
||||
imgp->vmspace_destroyed = true;
|
||||
imgp->sysent = sv;
|
||||
@ -1172,7 +1168,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
|
||||
*/
|
||||
vm_map_lock(map);
|
||||
vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
|
||||
MAP_ASLR_IGNSTART | MAP_WXORX);
|
||||
MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX);
|
||||
vm_map_unlock(map);
|
||||
} else {
|
||||
error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
|
||||
@ -1198,7 +1194,28 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate a new stack */
|
||||
return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the stack size limit and map the main process stack.
|
||||
*/
|
||||
int
|
||||
exec_map_stack(struct image_params *imgp)
|
||||
{
|
||||
struct rlimit rlim_stack;
|
||||
struct sysentvec *sv;
|
||||
struct proc *p;
|
||||
vm_map_t map;
|
||||
struct vmspace *vmspace;
|
||||
vm_offset_t stack_addr, stack_top;
|
||||
u_long ssiz;
|
||||
int error, find_space, stack_off;
|
||||
vm_prot_t stack_prot;
|
||||
|
||||
p = imgp->proc;
|
||||
sv = p->p_sysent;
|
||||
|
||||
if (imgp->stack_sz != 0) {
|
||||
ssiz = trunc_page(imgp->stack_sz);
|
||||
PROC_LOCK(p);
|
||||
@ -1215,25 +1232,44 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
|
||||
} else {
|
||||
ssiz = maxssiz;
|
||||
}
|
||||
stack_addr = sv->sv_usrstack - ssiz;
|
||||
stack_prot = obj != NULL && imgp->stack_prot != 0 ?
|
||||
|
||||
vmspace = p->p_vmspace;
|
||||
map = &vmspace->vm_map;
|
||||
|
||||
stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ?
|
||||
imgp->stack_prot : sv->sv_stackprot;
|
||||
error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, stack_prot,
|
||||
VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
|
||||
if ((map->flags & MAP_ASLR_STACK) != 0) {
|
||||
stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
|
||||
lim_max(curthread, RLIMIT_DATA));
|
||||
find_space = VMFS_ANY_SPACE;
|
||||
} else {
|
||||
stack_addr = sv->sv_usrstack - ssiz;
|
||||
find_space = VMFS_NO_SPACE;
|
||||
}
|
||||
error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz,
|
||||
sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL,
|
||||
MAP_STACK_GROWS_DOWN);
|
||||
if (error != KERN_SUCCESS) {
|
||||
uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x "
|
||||
"failed mach error %d errno %d\n", (uintmax_t)ssiz,
|
||||
"failed, mach error %d errno %d\n", (uintmax_t)ssiz,
|
||||
stack_prot, error, vm_mmap_to_errno(error));
|
||||
return (vm_mmap_to_errno(error));
|
||||
}
|
||||
vmspace->vm_stkgap = 0;
|
||||
|
||||
stack_top = stack_addr + ssiz;
|
||||
if ((map->flags & MAP_ASLR_STACK) != 0) {
|
||||
/* Randomize within the first page of the stack. */
|
||||
arc4rand(&stack_off, sizeof(stack_off), 0);
|
||||
stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *));
|
||||
}
|
||||
|
||||
/*
|
||||
* vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
|
||||
* are still used to enforce the stack rlimit on the process stack.
|
||||
*/
|
||||
vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
|
||||
vmspace->vm_maxsaddr = (char *)stack_addr;
|
||||
vmspace->vm_stacktop = stack_top;
|
||||
vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -77,7 +77,8 @@ struct execsw {
|
||||
* Prefer the kern.ps_strings or kern.proc.ps_strings sysctls to this constant.
|
||||
*/
|
||||
#define PS_STRINGS (USRSTACK - sizeof(struct ps_strings))
|
||||
#define PROC_PS_STRINGS(p) ((p)->p_sysent->sv_psstrings)
|
||||
#define PROC_PS_STRINGS(p) \
|
||||
((p)->p_vmspace->vm_stacktop - (p)->p_sysent->sv_psstringssz)
|
||||
|
||||
int exec_map_first_page(struct image_params *);
|
||||
void exec_unmap_first_page(struct image_params *);
|
||||
|
@ -114,6 +114,7 @@ int exec_check_permissions(struct image_params *);
|
||||
void exec_cleanup(struct thread *td, struct vmspace *);
|
||||
int exec_copyout_strings(struct image_params *, uintptr_t *);
|
||||
void exec_free_args(struct image_args *);
|
||||
int exec_map_stack(struct image_params *);
|
||||
int exec_new_vmspace(struct image_params *, struct sysentvec *);
|
||||
void exec_setregs(struct thread *, struct image_params *, uintptr_t);
|
||||
int exec_shell_imgact(struct image_params *);
|
||||
|
@ -343,7 +343,6 @@ vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
|
||||
vm->vm_taddr = 0;
|
||||
vm->vm_daddr = 0;
|
||||
vm->vm_maxsaddr = 0;
|
||||
vm->vm_stkgap = 0;
|
||||
return (vm);
|
||||
}
|
||||
|
||||
@ -4266,7 +4265,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
vm2->vm_taddr = vm1->vm_taddr;
|
||||
vm2->vm_daddr = vm1->vm_daddr;
|
||||
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
|
||||
vm2->vm_stkgap = vm1->vm_stkgap;
|
||||
vm_map_lock(old_map);
|
||||
if (old_map->busy)
|
||||
vm_map_wait_busy(old_map);
|
||||
@ -4285,7 +4283,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
|
||||
|
||||
new_map->anon_loc = old_map->anon_loc;
|
||||
new_map->flags |= old_map->flags & (MAP_ASLR | MAP_ASLR_IGNSTART |
|
||||
MAP_WXORX);
|
||||
MAP_ASLR_STACK | MAP_WXORX);
|
||||
|
||||
VM_MAP_ENTRY_FOREACH(old_entry, old_map) {
|
||||
if ((old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
|
||||
|
@ -223,12 +223,13 @@ struct vm_map {
|
||||
* vm_flags_t values
|
||||
*/
|
||||
#define MAP_WIREFUTURE 0x01 /* wire all future pages */
|
||||
#define MAP_BUSY_WAKEUP 0x02
|
||||
#define MAP_BUSY_WAKEUP 0x02 /* thread(s) waiting on busy state */
|
||||
#define MAP_IS_SUB_MAP 0x04 /* has parent */
|
||||
#define MAP_ASLR 0x08 /* enabled ASLR */
|
||||
#define MAP_ASLR_IGNSTART 0x10
|
||||
#define MAP_REPLENISH 0x20
|
||||
#define MAP_ASLR_IGNSTART 0x10 /* ASLR ignores data segment */
|
||||
#define MAP_REPLENISH 0x20 /* kmapent zone needs to be refilled */
|
||||
#define MAP_WXORX 0x40 /* enforce W^X */
|
||||
#define MAP_ASLR_STACK 0x80 /* stack location is randomized */
|
||||
|
||||
#ifdef _KERNEL
|
||||
#if defined(KLD_MODULE) && !defined(KLD_TIED)
|
||||
@ -293,7 +294,7 @@ struct vmspace {
|
||||
caddr_t vm_taddr; /* (c) user virtual address of text */
|
||||
caddr_t vm_daddr; /* (c) user virtual address of data */
|
||||
caddr_t vm_maxsaddr; /* user VA at max stack growth */
|
||||
vm_size_t vm_stkgap; /* stack gap size in bytes */
|
||||
vm_offset_t vm_stacktop; /* top of the stack, may not be page-aligned */
|
||||
u_int vm_refcnt; /* number of references */
|
||||
/*
|
||||
* Keep the PMAP last, so that CPU-specific variations of that
|
||||
|
Loading…
Reference in New Issue
Block a user