freebsd-skq/sys/kern/imgact_aout.c

344 lines
9.4 KiB
C
Raw Normal View History

/*-
* Copyright (c) 1993, David Greenman
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1995-09-08 13:24:33 +00:00
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
2003-06-11 00:56:59 +00:00
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/exec.h>
#include <sys/imgact.h>
#include <sys/imgact_aout.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
#include <sys/sysent.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <machine/frame.h>
#include <machine/md_var.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_param.h>
#ifdef __amd64__
#include <compat/freebsd32/freebsd32_signal.h>
#include <compat/freebsd32/freebsd32_util.h>
#include <compat/freebsd32/freebsd32_proto.h>
#include <compat/freebsd32/freebsd32_syscall.h>
#include <compat/ia32/ia32_signal.h>
#endif
2002-03-19 21:25:46 +00:00
static int exec_aout_imgact(struct image_params *imgp);
static int aout_fixup(register_t **stack_base, struct image_params *imgp);
1995-12-02 16:32:03 +00:00
#if defined(__i386__)
struct sysentvec aout_sysvec = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
.sv_mask = 0,
.sv_sigsize = 0,
.sv_sigtbl = NULL,
.sv_errsize = 0,
.sv_errtbl = NULL,
.sv_transtrap = NULL,
.sv_fixup = aout_fixup,
.sv_sendsig = sendsig,
.sv_sigcode = sigcode,
.sv_szsigcode = &szsigcode,
.sv_prepsyscall = NULL,
.sv_name = "FreeBSD a.out",
.sv_coredump = NULL,
.sv_imgact_try = NULL,
.sv_minsigstksz = MINSIGSTKSZ,
.sv_pagesize = PAGE_SIZE,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
.sv_psstrings = PS_STRINGS,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_strings = exec_copyout_strings,
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32,
Reorganize syscall entry and leave handling. Extend struct sysvec with three new elements: sv_fetch_syscall_args - the method to fetch syscall arguments from usermode into struct syscall_args. The structure is machine-depended (this might be reconsidered after all architectures are converted). sv_set_syscall_retval - the method to set a return value for usermode from the syscall. It is a generalization of cpu_set_syscall_retval(9) to allow ABIs to override the way to set a return value. sv_syscallnames - the table of syscall names. Use sv_set_syscall_retval in kern_sigsuspend() instead of hardcoding the call to cpu_set_syscall_retval(). The new functions syscallenter(9) and syscallret(9) are provided that use sv_*syscall* pointers and contain the common repeated code from the syscall() implementations for the architecture-specific syscall trap handlers. Syscallenter() fetches arguments, calls syscall implementation from ABI sysent table, and set up return frame. The end of syscall bookkeeping is done by syscallret(). Take advantage of single place for MI syscall handling code and implement ptrace_lwpinfo pl_flags PL_FLAG_SCE, PL_FLAG_SCX and PL_FLAG_EXEC. The SCE and SCX flags notify the debugger that the thread is stopped at syscall entry or return point respectively. The EXEC flag augments SCX and notifies debugger that the process address space was changed by one of exec(2)-family syscalls. The i386, amd64, sparc64, sun4v, powerpc and ia64 syscall()s are changed to use syscallenter()/syscallret(). MIPS and arm are not converted and use the mostly unchanged syscall() implementation. Reviewed by: jhb, marcel, marius, nwhitehorn, stas Tested by: marcel (ia64), marius (sparc64), nwhitehorn (powerpc), stas (mips) MFC after: 1 month
2010-05-23 18:32:02 +00:00
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
.sv_schedtail = NULL,
};
#elif defined(__amd64__)
#define AOUT32_USRSTACK 0xbfc00000
#define AOUT32_PS_STRINGS \
(AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings))
#define AOUT32_MINUSER FREEBSD32_MINUSER
extern const char *freebsd32_syscallnames[];
extern u_long ia32_maxssiz;
struct sysentvec aout_sysvec = {
.sv_size = FREEBSD32_SYS_MAXSYSCALL,
.sv_table = freebsd32_sysent,
.sv_mask = 0,
.sv_sigsize = 0,
.sv_sigtbl = NULL,
.sv_errsize = 0,
.sv_errtbl = NULL,
.sv_transtrap = NULL,
.sv_fixup = aout_fixup,
.sv_sendsig = ia32_sendsig,
.sv_sigcode = ia32_sigcode,
.sv_szsigcode = &sz_ia32_sigcode,
.sv_prepsyscall = NULL,
.sv_name = "FreeBSD a.out",
.sv_coredump = NULL,
.sv_imgact_try = NULL,
.sv_minsigstksz = MINSIGSTKSZ,
.sv_pagesize = IA32_PAGE_SIZE,
.sv_minuser = AOUT32_MINUSER,
.sv_maxuser = AOUT32_USRSTACK,
.sv_usrstack = AOUT32_USRSTACK,
.sv_psstrings = AOUT32_PS_STRINGS,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_strings = freebsd32_copyout_strings,
.sv_setregs = ia32_setregs,
.sv_fixlimit = ia32_fixlimit,
.sv_maxssiz = &ia32_maxssiz,
.sv_flags = SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32,
.sv_set_syscall_retval = ia32_set_syscall_retval,
.sv_fetch_syscall_args = ia32_fetch_syscall_args,
.sv_syscallnames = freebsd32_syscallnames,
};
#else
#error "Port me"
#endif
static int
aout_fixup(register_t **stack_base, struct image_params *imgp)
{
*(char **)stack_base -= sizeof(uint32_t);
return (suword32(*stack_base, imgp->args->argc));
}
static int
exec_aout_imgact(struct image_params *imgp)
{
const struct exec *a_out = (const struct exec *) imgp->image_header;
struct vmspace *vmspace;
vm_map_t map;
vm_object_t object;
vm_offset_t text_end, data_end;
unsigned long virtual_offset;
unsigned long file_offset;
unsigned long bss_size;
int error;
/*
* Linux and *BSD binaries look very much alike,
1995-05-30 08:16:23 +00:00
* only the machine id is different:
1995-06-11 19:33:05 +00:00
* 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI.
* NetBSD is in network byte order.. ugh.
*/
if (((a_out->a_midmag >> 16) & 0xff) != 0x86 &&
((a_out->a_midmag >> 16) & 0xff) != 0 &&
((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86)
1995-06-11 19:33:05 +00:00
return -1;
/*
* Set file/virtual offset based on a.out variant.
* We do two cases: host byte order and network byte order
* (for NetBSD compatibility)
*/
switch ((int)(a_out->a_midmag & 0xffff)) {
case ZMAGIC:
virtual_offset = 0;
if (a_out->a_text) {
file_offset = PAGE_SIZE;
} else {
/* Bill's "screwball mode" */
file_offset = 0;
}
break;
case QMAGIC:
virtual_offset = PAGE_SIZE;
file_offset = 0;
/* Pass PS_STRINGS for BSD/OS binaries only. */
if (N_GETMID(*a_out) == MID_ZERO)
imgp->ps_strings = aout_sysvec.sv_psstrings;
break;
default:
/* NetBSD compatibility */
switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) {
case ZMAGIC:
case QMAGIC:
virtual_offset = PAGE_SIZE;
file_offset = 0;
break;
default:
return (-1);
}
}
bss_size = roundup(a_out->a_bss, PAGE_SIZE);
/*
* Check various fields in header for validity/bounds.
*/
if (/* entry point must lay with text region */
a_out->a_entry < virtual_offset ||
a_out->a_entry >= virtual_offset + a_out->a_text ||
/* text and data size must each be page rounded */
a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK
#ifdef __amd64__
||
/* overflows */
virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX
#endif
)
return (-1);
/* text + data can't exceed file size */
if (a_out->a_data + a_out->a_text > imgp->attr->va_size)
return (EFAULT);
/*
* text/data/bss must not exceed limits
*/
Locking for the per-process resource limits structure. - struct plimit includes a mutex to protect a reference count. The plimit structure is treated similarly to struct ucred in that is is always copy on write, so having a reference to a structure is sufficient to read from it without needing a further lock. - The proc lock protects the p_limit pointer and must be held while reading limits from a process to keep the limit structure from changing out from under you while reading from it. - Various global limits that are ints are not protected by a lock since int writes are atomic on all the archs we support and thus a lock wouldn't buy us anything. - All accesses to individual resource limits from a process are abstracted behind a simple lim_rlimit(), lim_max(), and lim_cur() API that return either an rlimit, or the current or max individual limit of the specified resource from a process. - dosetrlimit() was renamed to kern_setrlimit() to match existing style of other similar syscall helper functions. - The alpha OSF/1 compat layer no longer calls getrlimit() and setrlimit() (it didn't used the stackgap when it should have) but uses lim_rlimit() and kern_setrlimit() instead. - The svr4 compat no longer uses the stackgap for resource limits calls, but uses lim_rlimit() and kern_setrlimit() instead. - The ibcs2 compat no longer uses the stackgap for resource limits. It also no longer uses the stackgap for accessing sysctl's for the ibcs2_sysconf() syscall but uses kernel_sysctl() instead. As a result, ibcs2_sysconf() no longer needs Giant. - The p_rlimit macro no longer exists. Submitted by: mtm (mostly, I only did a few cleanups and catchups) Tested on: i386 Compiled on: alpha, amd64
2004-02-04 21:52:57 +00:00
PROC_LOCK(imgp->proc);
if (/* text can't exceed maximum text size */
a_out->a_text > maxtsiz ||
/* data + bss can't exceed rlimit */
a_out->a_data + bss_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) {
Locking for the per-process resource limits structure. - struct plimit includes a mutex to protect a reference count. The plimit structure is treated similarly to struct ucred in that is is always copy on write, so having a reference to a structure is sufficient to read from it without needing a further lock. - The proc lock protects the p_limit pointer and must be held while reading limits from a process to keep the limit structure from changing out from under you while reading from it. - Various global limits that are ints are not protected by a lock since int writes are atomic on all the archs we support and thus a lock wouldn't buy us anything. - All accesses to individual resource limits from a process are abstracted behind a simple lim_rlimit(), lim_max(), and lim_cur() API that return either an rlimit, or the current or max individual limit of the specified resource from a process. - dosetrlimit() was renamed to kern_setrlimit() to match existing style of other similar syscall helper functions. - The alpha OSF/1 compat layer no longer calls getrlimit() and setrlimit() (it didn't used the stackgap when it should have) but uses lim_rlimit() and kern_setrlimit() instead. - The svr4 compat no longer uses the stackgap for resource limits calls, but uses lim_rlimit() and kern_setrlimit() instead. - The ibcs2 compat no longer uses the stackgap for resource limits. It also no longer uses the stackgap for accessing sysctl's for the ibcs2_sysconf() syscall but uses kernel_sysctl() instead. As a result, ibcs2_sysconf() no longer needs Giant. - The p_rlimit macro no longer exists. Submitted by: mtm (mostly, I only did a few cleanups and catchups) Tested on: i386 Compiled on: alpha, amd64
2004-02-04 21:52:57 +00:00
PROC_UNLOCK(imgp->proc);
return (ENOMEM);
Locking for the per-process resource limits structure. - struct plimit includes a mutex to protect a reference count. The plimit structure is treated similarly to struct ucred in that is is always copy on write, so having a reference to a structure is sufficient to read from it without needing a further lock. - The proc lock protects the p_limit pointer and must be held while reading limits from a process to keep the limit structure from changing out from under you while reading from it. - Various global limits that are ints are not protected by a lock since int writes are atomic on all the archs we support and thus a lock wouldn't buy us anything. - All accesses to individual resource limits from a process are abstracted behind a simple lim_rlimit(), lim_max(), and lim_cur() API that return either an rlimit, or the current or max individual limit of the specified resource from a process. - dosetrlimit() was renamed to kern_setrlimit() to match existing style of other similar syscall helper functions. - The alpha OSF/1 compat layer no longer calls getrlimit() and setrlimit() (it didn't used the stackgap when it should have) but uses lim_rlimit() and kern_setrlimit() instead. - The svr4 compat no longer uses the stackgap for resource limits calls, but uses lim_rlimit() and kern_setrlimit() instead. - The ibcs2 compat no longer uses the stackgap for resource limits. It also no longer uses the stackgap for accessing sysctl's for the ibcs2_sysconf() syscall but uses kernel_sysctl() instead. As a result, ibcs2_sysconf() no longer needs Giant. - The p_rlimit macro no longer exists. Submitted by: mtm (mostly, I only did a few cleanups and catchups) Tested on: i386 Compiled on: alpha, amd64
2004-02-04 21:52:57 +00:00
}
PROC_UNLOCK(imgp->proc);
/*
* Avoid a possible deadlock if the current address space is destroyed
* and that address space maps the locked vnode. In the common case,
* the locked vnode's v_usecount is decremented but remains greater
* than zero. Consequently, the vnode lock is not needed by vrele().
* However, in cases where the vnode lock is external, such as nullfs,
* v_usecount may become zero.
*/
VOP_UNLOCK(imgp->vp, 0);
/*
* Destroy old process VM and create a new one (with a new stack)
*/
error = exec_new_vmspace(imgp, &aout_sysvec);
vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
if (error)
return (error);
/*
* The vm space can be changed by exec_new_vmspace
*/
vmspace = imgp->proc->p_vmspace;
object = imgp->object;
map = &vmspace->vm_map;
vm_map_lock(map);
vm_object_reference(object);
text_end = virtual_offset + a_out->a_text;
error = vm_map_insert(map, object,
file_offset,
virtual_offset, text_end,
VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
vm_object_deallocate(object);
return (error);
}
data_end = text_end + a_out->a_data;
if (a_out->a_data) {
vm_object_reference(object);
error = vm_map_insert(map, object,
file_offset + a_out->a_text,
text_end, data_end,
VM_PROT_ALL, VM_PROT_ALL,
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
vm_object_deallocate(object);
return (error);
}
}
if (bss_size) {
error = vm_map_insert(map, NULL, 0,
data_end, data_end + bss_size,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_map_unlock(map);
return (error);
}
}
vm_map_unlock(map);
/* Fill in process VM information */
vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT;
vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT;
vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset;
vmspace->vm_daddr = (caddr_t) (uintptr_t)
(virtual_offset + a_out->a_text);
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;
1995-05-30 08:16:23 +00:00
imgp->proc->p_sysent = &aout_sysvec;
return (0);
}
/*
* Tell kern_execve.c about it, with a little help from the linker.
*/
static struct execsw aout_execsw = { exec_aout_imgact, "a.out" };
EXEC_SET(aout, aout_execsw);