freebsd-skq/sys/i386/linux/linux_sysvec.c
Peter Wemm d66a506616 Mega-commit for Linux emulator update.. This has been stress tested under
netscape-2.0 for Linux running all the Java stuff.  The scrollbars are now
working, at least on my machine. (whew! :-)

I'm uncomfortable with the size of this commit, but it's too
inter-dependant to easily seperate out.

The main changes:

COMPAT_LINUX is *GONE*.  Most of the code has been moved out of the i386
machine dependent section into the linux emulator itself.  The int 0x80
syscall code was almost identical to the lcall 7,0 code and a minor tweak
allows them to both be used with the same C code.  All kernels can now
just modload the lkm and it'll DTRT without having to rebuild the kernel
first.  Like IBCS2, you can statically compile it in with "options LINUX".

A pile of new syscalls implemented, including getdents(), llseek(),
readv(), writev(), msync(), personality().  The Linux-ELF libraries want
to use some of these.

linux_select() now obeys Linux semantics, ie: returns the time remaining
of the timeout value rather than leaving it the original value.

Quite a few bugs removed, including incorrect arguments being used in
syscalls..  eg:  mixups between passing the sigset as an int, vs passing
it as a pointer and doing a copyin(), missing return values, unhandled
cases, SIOC* ioctls, etc.

The build for the code has changed.  i386/conf/files now knows how
to build linux_genassym and generate linux_assym.h on the fly.

Supporting changes elsewhere in the kernel:

The user-mode signal trampoline has moved from the U area to immediately
below the top of the stack (below PS_STRINGS).  This allows the different
binary emulations to have their own signal trampoline code (which gets rid
of the hardwired syscall 103 (sigreturn on BSD, syslog on Linux)) and so
that the emulator can provide the exact "struct sigcontext *" argument to
the program's signal handlers.

The sigstack's "ss_flags" now uses SS_DISABLE and SS_ONSTACK flags, which
have the same values as the re-used SA_DISABLE and SA_ONSTACK which are
intended for sigaction only.  This enables the support of a SA_RESETHAND
flag to sigaction to implement the gross SYSV and Linux SA_ONESHOT signal
semantics where the signal handler is reset when it's triggered.

makesyscalls.sh no longer appends the struct sysentvec on the end of the
generated init_sysent.c code.  It's a lot saner to have it in a seperate
file rather than trying to update the structure inside the awk script. :-)

At exec time, the dozen bytes or so of signal trampoline code are copied
to the top of the user's stack, rather than obtaining the trampoline code
the old way by getting a clone of the parent's user area.  This allows
Linux and native binaries to freely exec each other without getting
trampolines mixed up.
1996-03-02 19:38:20 +00:00

359 lines
11 KiB
C

/*-
* Copyright (c) 1994-1995 Søren Schmidt
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software withough specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: linux_sysent.c,v 1.4 1996/01/14 10:59:57 sos Exp $
*/
/* XXX we use functions that might not exist. */
#define COMPAT_43 1
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/sysent.h>
#include <sys/imgact.h>
#include <sys/signalvar.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_prot.h>
#include <vm/lock.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
#include <sys/user.h>
#include <sys/exec.h>
#include <machine/cpu.h>
#include <machine/frame.h>
#include <machine/reg.h>
#include <machine/specialreg.h>
#include <machine/psl.h>
#include <machine/sysarch.h>
#include <machine/md_var.h>
#include <i386/linux/linux.h>
#include <i386/linux/linux_proto.h>
#include <i386/linux/linux_syscall.h>
/*
* Linux syscalls return negative errno's, we do positive and map them
*/
int bsd_to_linux_errno[ELAST] = {
-0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
-116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
-6,
};
int bsd_to_linux_signal[NSIG] = {
0, LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT,
LINUX_SIGILL, LINUX_SIGTRAP, LINUX_SIGABRT, 0,
LINUX_SIGFPE, LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV,
0, LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM,
LINUX_SIGURG, LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT,
LINUX_SIGCHLD, LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO,
LINUX_SIGXCPU, LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF,
LINUX_SIGWINCH, 0, LINUX_SIGUSR1, LINUX_SIGUSR2
};
int linux_to_bsd_signal[LINUX_NSIG] = {
0, SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGTRAP, SIGABRT, SIGEMT,
SIGFPE, SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, SIGPIPE, SIGALRM, SIGTERM,
SIGBUS, SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGIO,
SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, SIGURG, SIGURG, 0
};
int linux_fixup(int **stack_base, struct image_params *imgp)
{
int *argv, *envp;
argv = *stack_base;
envp = *stack_base + (imgp->argc + 1);
(*stack_base)--;
**stack_base = (int)envp;
(*stack_base)--;
**stack_base = (int)argv;
(*stack_base)--;
**stack_base = (int)imgp->argc;
return 0; /* XXX */
}
extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
static void linux_sendsig(sig_t action,
int sig,
int returnmask,
unsigned code);
extern int _ucodesel, _udatasel;
/*
* Send an interrupt to process.
*
* Stack is set up to allow sigcode stored
* in u. to call routine, followed by kcall
* to sigreturn routine below. After sigreturn
* resets the signal mask, the stack, and the
* frame pointer, it returns to the user
* specified pc, psl.
*/
static void
linux_sendsig(catcher, sig, mask, code)
sig_t catcher;
int sig;
int mask;
unsigned code;
{
register struct proc *p = curproc;
register int *regs;
struct linux_sigframe *fp, frame;
struct sigacts *psp = p->p_sigacts;
int oonstack;
regs = p->p_md.md_regs;
oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
#ifdef DEBUG
printf("Linux-emul(%d): linux_sendsig(%8x, %d, %d, %d)\n",
p->p_pid, catcher, sig, mask, code);
#endif
/*
* Allocate space for the signal handler context.
*/
if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
(psp->ps_sigonstack & sigmask(sig))) {
fp = (struct linux_sigframe *)(psp->ps_sigstk.ss_sp +
psp->ps_sigstk.ss_size - sizeof(struct linux_sigframe));
psp->ps_sigstk.ss_flags |= SS_ONSTACK;
} else {
fp = (struct linux_sigframe *)regs[tESP] - 1;
}
/*
* grow() will return FALSE if the fp will not fit inside the stack
* and the stack can not be grown. useracc will return FALSE
* if access is denied.
*/
if ((grow(p, (int)fp) == FALSE) ||
(useracc((caddr_t)fp, sizeof (struct linux_sigframe), B_WRITE) == FALSE)) {
/*
* Process has trashed its stack; give it an illegal
* instruction to halt it in its tracks.
*/
SIGACTION(p, SIGILL) = SIG_DFL;
sig = sigmask(SIGILL);
p->p_sigignore &= ~sig;
p->p_sigcatch &= ~sig;
p->p_sigmask &= ~sig;
psignal(p, SIGILL);
return;
}
/*
* Build the argument list for the signal handler.
*/
if (p->p_sysent->sv_sigtbl) {
if (sig < p->p_sysent->sv_sigsize)
sig = p->p_sysent->sv_sigtbl[sig];
else
sig = p->p_sysent->sv_sigsize + 1;
}
frame.sf_handler = catcher;
frame.sf_sig = sig;
/*
* Build the signal context to be used by sigreturn.
*/
frame.sf_sc.sc_mask = mask;
__asm("movl %%gs,%w0" : "=r" (frame.sf_sc.sc_gs));
__asm("movl %%fs,%w0" : "=r" (frame.sf_sc.sc_fs));
frame.sf_sc.sc_es = regs[tES];
frame.sf_sc.sc_ds = regs[tDS];
frame.sf_sc.sc_edi = regs[tEDI];
frame.sf_sc.sc_esi = regs[tESI];
frame.sf_sc.sc_ebp = regs[tEBP];
frame.sf_sc.sc_ebx = regs[tEBX];
frame.sf_sc.sc_edx = regs[tEDX];
frame.sf_sc.sc_ecx = regs[tECX];
frame.sf_sc.sc_eax = regs[tEAX];
frame.sf_sc.sc_eip = regs[tEIP];
frame.sf_sc.sc_cs = regs[tCS];
frame.sf_sc.sc_eflags = regs[tEFLAGS];
frame.sf_sc.sc_esp_at_signal = regs[tESP];
frame.sf_sc.sc_ss = regs[tSS];
frame.sf_sc.sc_err = regs[tERR];
frame.sf_sc.sc_trapno = code; /* XXX ???? */
if (copyout(&frame, fp, sizeof(frame)) != 0) {
/*
* Process has trashed its stack; give it an illegal
* instruction to halt it in its tracks.
*/
sigexit(p, SIGILL);
/* NOTREACHED */
}
/*
* Build context to run handler in.
*/
regs[tESP] = (int)fp;
regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
regs[tEFLAGS] &= ~PSL_VM;
regs[tCS] = _ucodesel;
regs[tDS] = _udatasel;
regs[tES] = _udatasel;
regs[tSS] = _udatasel;
}
/*
* System call to cleanup state after a signal
* has been taken. Reset signal mask and
* stack state from context left by sendsig (above).
* Return to previous pc and psl as specified by
* context left by sendsig. Check carefully to
* make sure that the user has not modified the
* psl to gain improper privileges or to cause
* a machine fault.
*/
int
linux_sigreturn(p, args, retval)
struct proc *p;
struct linux_sigreturn_args *args;
int *retval;
{
struct linux_sigcontext *scp, context;
register int *regs;
int eflags;
regs = p->p_md.md_regs;
#ifdef DEBUG
printf("Linux-emul(%d): linux_sigreturn(%8x)\n", p->p_pid, args->scp);
#endif
/*
* The trampoline code hands us the context.
* It is unsafe to keep track of it ourselves, in the event that a
* program jumps out of a signal handler.
*/
scp = args->scp;
if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
return (EFAULT);
/*
* Check for security violations.
*/
#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
eflags = context.sc_eflags;
/*
* XXX do allow users to change the privileged flag PSL_RF. The
* cpu sets PSL_RF in tf_eflags for faults. Debuggers should
* sometimes set it there too. tf_eflags is kept in the signal
* context during signal handling and there is no other place
* to remember it, so the PSL_RF bit may be corrupted by the
* signal handler without us knowing. Corruption of the PSL_RF
* bit at worst causes one more or one less debugger trap, so
* allowing it is fairly harmless.
*/
if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
return(EINVAL);
}
/*
* Don't allow users to load a valid privileged %cs. Let the
* hardware check for invalid selectors, excess privilege in
* other selectors, invalid %eip's and invalid %esp's.
*/
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
if (!CS_SECURE(context.sc_cs)) {
trapsignal(p, SIGBUS, T_PROTFLT);
return(EINVAL);
}
p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
p->p_sigmask = context.sc_mask &~
(sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
/*
* Restore signal context.
*/
/* %fs and %gs were restored by the trampoline. */
regs[tES] = context.sc_es;
regs[tDS] = context.sc_ds;
regs[tEDI] = context.sc_edi;
regs[tESI] = context.sc_esi;
regs[tEBP] = context.sc_ebp;
regs[tEBX] = context.sc_ebx;
regs[tEDX] = context.sc_edx;
regs[tECX] = context.sc_ecx;
regs[tEAX] = context.sc_eax;
regs[tEIP] = context.sc_eip;
regs[tCS] = context.sc_cs;
regs[tEFLAGS] = eflags;
regs[tESP] = context.sc_esp_at_signal;
regs[tSS] = context.sc_ss;
return (EJUSTRETURN);
}
static void
linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
{
int i;
args[0] = tf->tf_ebx;
args[1] = tf->tf_ecx;
args[2] = tf->tf_edx;
args[3] = tf->tf_esi;
args[4] = tf->tf_edi;
*params = NULL; /* no copyin */
}
extern char linux_sigcode[];
extern int linux_szsigcode;
struct sysentvec linux_sysvec = {
sizeof (linux_sysent) / sizeof(linux_sysent[0]),
linux_sysent,
0xff,
NSIG,
bsd_to_linux_signal,
ELAST,
bsd_to_linux_errno,
linux_fixup,
linux_sendsig,
linux_sigcode,
&linux_szsigcode,
linux_prepsyscall,
};