freebsd-skq/sys/ia64/ia32/ia32_sysvec.c
Marcel Moolenaar f2c49dd248 Revamp of the syscall path, exception and context handling. The
prime objectives are:
o  Implement a syscall path based on the epc inststruction (see
   sys/ia64/ia64/syscall.s).
o  Revisit the places were we need to save and restore registers
   and define those contexts in terms of the register sets (see
   sys/ia64/include/_regset.h).

Secundairy objectives:
o  Remove the requirement to use contigmalloc for kernel stacks.
o  Better handling of the high FP registers for SMP systems.
o  Switch to the new cpu_switch() and cpu_throw() semantics.
o  Add a good unwinder to reconstruct contexts for the rare
   cases we need to (see sys/contrib/ia64/libuwx)

Many files are affected by this change. Functionally it boils
down to:
o  The EPC syscall doesn't preserve registers it does not need
   to preserve and places the arguments differently on the stack.
   This affects libc and truss.
o  The address of the kernel page directory (kptdir) had to
   be unstaticized for use by the nested TLB fault handler.
   The name has been changed to ia64_kptdir to avoid conflicts.
   The renaming affects libkvm.
o  The trapframe only contains the special registers and the
   scratch registers. For syscalls using the EPC syscall path
   no scratch registers are saved. This affects all places where
   the trapframe is accessed. Most notably the unaligned access
   handler, the signal delivery code and the debugger.
o  Context switching only partly saves the special registers
   and the preserved registers. This affects cpu_switch() and
   triggered the move to the new semantics, which additionally
   affects cpu_throw().
o  The high FP registers are either in the PCB or on some
   CPU. context switching for them is done lazily. This affects
   trap().
o  The mcontext has room for all registers, but not all of them
   have to be defined in all cases. This mostly affects signal
   delivery code now. The *context syscalls are as of yet still
   unimplemented.

Many details went into the removal of the requirement to use
contigmalloc for kernel stacks. The details are mostly CPU
specific and limited to exception_save() and exception_restore().
The few places where we create, destroy or switch stacks were
mostly simplified by not having to construct physical addresses
and additionally saving the virtual addresses for later use.

Besides more efficient context saving and restoring, which of
course yields a noticable speedup, this also fixes the dreaded
SMP bootup problem as a side-effect. The details of which are
still not fully understood.

This change includes all the necessary backward compatibility
code to have it handle older userland binaries that use the
break instruction for syscalls. Support for break-based syscalls
has been pessimized in favor of a clean implementation. Due to
the overall better performance of the kernel, this will still
be notived as an improvement if it's noticed at all.

Approved by: re@ (jhb)
2003-05-16 21:26:42 +00:00

385 lines
10 KiB
C

/*-
* Copyright (c) 2002 Doug Rabson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define __ELF_WORD_SIZE 32
#include <sys/param.h>
#include <sys/exec.h>
#include <sys/fcntl.h>
#include <sys/imgact.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/namei.h>
#include <sys/pioctl.h>
#include <sys/proc.h>
#include <sys/procfs.h>
#include <sys/resourcevar.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/vnode.h>
#include <sys/imgact_elf.h>
#include <machine/frame.h>
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <vm/vm.h>
#include <vm/vm_kern.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
#include <ia64/ia32/ia32_util.h>
#include <i386/include/psl.h>
#include <i386/include/segments.h>
#include <i386/include/specialreg.h>
static register_t *ia32_copyout_strings(struct image_params *imgp);
static void ia32_setregs(struct thread *td, u_long entry, u_long stack,
u_long ps_strings);
extern struct sysent ia32_sysent[];
static char ia32_sigcode[] = {
0xff, 0x54, 0x24, 0x10, /* call *SIGF_HANDLER(%esp) */
0x8d, 0x44, 0x24, 0x14, /* lea SIGF_UC(%esp),%eax */
0x50, /* pushl %eax */
0xf7, 0x40, 0x54, 0x00, 0x00, 0x02, 0x02, /* testl $PSL_VM,UC_EFLAGS(%eax) */
0x75, 0x03, /* jne 9f */
0x8e, 0x68, 0x14, /* movl UC_GS(%eax),%gs */
0xb8, 0x57, 0x01, 0x00, 0x00, /* 9: movl $SYS_sigreturn,%eax */
0x50, /* pushl %eax */
0xcd, 0x80, /* int $0x80 */
0xeb, 0xfe, /* 0: jmp 0b */
0
};
static int ia32_szsigcode = sizeof(ia32_sigcode);
struct sysentvec ia32_freebsd_sysvec = {
SYS_MAXSYSCALL,
ia32_sysent,
0,
0,
NULL,
0,
NULL,
NULL,
elf32_freebsd_fixup,
sendsig,
ia32_sigcode,
&ia32_szsigcode,
NULL,
"FreeBSD ELF",
elf32_coredump,
NULL,
IA32_MINSIGSTKSZ,
IA32_PAGE_SIZE,
0,
IA32_USRSTACK,
IA32_USRSTACK,
IA32_PS_STRINGS,
VM_PROT_ALL,
ia32_copyout_strings,
ia32_setregs
};
static Elf32_Brandinfo ia32_brand_info = {
ELFOSABI_FREEBSD,
EM_386,
"FreeBSD",
"/compat/ia32",
"/usr/libexec/ld-elf.so.1",
&ia32_freebsd_sysvec
};
SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_ANY,
(sysinit_cfunc_t) elf32_insert_brand_entry,
&ia32_brand_info);
static register_t *
ia32_copyout_strings(struct image_params *imgp)
{
int argc, envc;
u_int32_t *vectp;
char *stringp, *destp;
u_int32_t *stack_base;
struct ia32_ps_strings *arginfo;
int szsigcode;
/*
* Calculate string base and vector table pointers.
* Also deal with signal trampoline code for this exec type.
*/
arginfo = (struct ia32_ps_strings *)IA32_PS_STRINGS;
szsigcode = *(imgp->proc->p_sysent->sv_szsigcode);
destp = (caddr_t)arginfo - szsigcode - IA32_USRSPACE -
roundup((ARG_MAX - imgp->stringspace), sizeof(char *));
/*
* install sigcode
*/
if (szsigcode)
copyout(imgp->proc->p_sysent->sv_sigcode,
((caddr_t)arginfo - szsigcode), szsigcode);
/*
* If we have a valid auxargs ptr, prepare some room
* on the stack.
*/
if (imgp->auxargs) {
/*
* 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
* lower compatibility.
*/
imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
: (AT_COUNT * 2);
/*
* The '+ 2' is for the null pointers at the end of each of
* the arg and env vector sets,and imgp->auxarg_size is room
* for argument of Runtime loader.
*/
vectp = (u_int32_t *) (destp - (imgp->argc + imgp->envc + 2 +
imgp->auxarg_size) * sizeof(u_int32_t));
} else
/*
* The '+ 2' is for the null pointers at the end of each of
* the arg and env vector sets
*/
vectp = (u_int32_t *)
(destp - (imgp->argc + imgp->envc + 2) * sizeof(u_int32_t));
/*
* vectp also becomes our initial stack base
*/
vectp = (void*)((uintptr_t)vectp & ~15);
stack_base = vectp;
stringp = imgp->stringbase;
argc = imgp->argc;
envc = imgp->envc;
/*
* Copy out strings - arguments and environment.
*/
copyout(stringp, destp, ARG_MAX - imgp->stringspace);
/*
* Fill in "ps_strings" struct for ps, w, etc.
*/
suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
suword32(&arginfo->ps_nargvstr, argc);
/*
* Fill in argument portion of vector table.
*/
for (; argc > 0; --argc) {
suword32(vectp++, (u_int32_t)(intptr_t)destp);
while (*stringp++ != 0)
destp++;
destp++;
}
/* a null vector table pointer separates the argp's from the envp's */
suword32(vectp++, 0);
suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
suword32(&arginfo->ps_nenvstr, envc);
/*
* Fill in environment portion of vector table.
*/
for (; envc > 0; --envc) {
suword32(vectp++, (u_int32_t)(intptr_t)destp);
while (*stringp++ != 0)
destp++;
destp++;
}
/* end of vector table is a null pointer */
suword32(vectp, 0);
return ((register_t *)stack_base);
}
static void
ia32_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings)
{
struct trapframe *tf = td->td_frame;
vm_offset_t gdt, ldt;
u_int64_t codesel, datasel, ldtsel;
u_int64_t codeseg, dataseg, gdtseg, ldtseg;
struct segment_descriptor desc;
struct vmspace *vmspace = td->td_proc->p_vmspace;
exec_setregs(td, entry, stack, ps_strings);
/* Non-syscall frames are cleared by exec_setregs() */
if (tf->tf_flags & FRAME_SYSCALL) {
bzero(&tf->tf_scratch, sizeof(tf->tf_scratch));
bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp));
} else
tf->tf_special.ndirty = 0;
tf->tf_special.psr |= IA64_PSR_IS;
tf->tf_special.sp = stack;
/* Point the RSE backstore to something harmless. */
tf->tf_special.bspstore = (IA32_PS_STRINGS - ia32_szsigcode -
IA32_USRSPACE + 15) & ~15;
codesel = LSEL(LUCODE_SEL, SEL_UPL);
datasel = LSEL(LUDATA_SEL, SEL_UPL);
ldtsel = GSEL(GLDT_SEL, SEL_UPL);
/* Setup ia32 segment registers. */
tf->tf_scratch.gr16 = (datasel << 48) | (datasel << 32) |
(datasel << 16) | datasel;
tf->tf_scratch.gr17 = (ldtsel << 32) | (datasel << 16) | codesel;
/*
* Build the GDT and LDT.
*/
gdt = IA32_USRSTACK;
vm_map_find(&vmspace->vm_map, 0, 0, &gdt, IA32_PAGE_SIZE << 1, 0,
VM_PROT_ALL, VM_PROT_ALL, 0);
ldt = gdt + IA32_PAGE_SIZE;
desc.sd_lolimit = 8*NLDT-1;
desc.sd_lobase = ldt & 0xffffff;
desc.sd_type = SDT_SYSLDT;
desc.sd_dpl = SEL_UPL;
desc.sd_p = 1;
desc.sd_hilimit = 0;
desc.sd_def32 = 0;
desc.sd_gran = 0;
desc.sd_hibase = ldt >> 24;
copyout(&desc, (caddr_t) gdt + 8*GLDT_SEL, sizeof(desc));
desc.sd_lolimit = ((IA32_USRSTACK >> 12) - 1) & 0xffff;
desc.sd_lobase = 0;
desc.sd_type = SDT_MEMERA;
desc.sd_dpl = SEL_UPL;
desc.sd_p = 1;
desc.sd_hilimit = ((IA32_USRSTACK >> 12) - 1) >> 16;
desc.sd_def32 = 1;
desc.sd_gran = 1;
desc.sd_hibase = 0;
copyout(&desc, (caddr_t) ldt + 8*LUCODE_SEL, sizeof(desc));
desc.sd_type = SDT_MEMRWA;
copyout(&desc, (caddr_t) ldt + 8*LUDATA_SEL, sizeof(desc));
codeseg = 0 /* base */
+ (((IA32_USRSTACK >> 12) - 1) << 32) /* limit */
+ ((long)SDT_MEMERA << 52)
+ ((long)SEL_UPL << 57)
+ (1L << 59) /* present */
+ (1L << 62) /* 32 bits */
+ (1L << 63); /* page granularity */
dataseg = 0 /* base */
+ (((IA32_USRSTACK >> 12) - 1) << 32) /* limit */
+ ((long)SDT_MEMRWA << 52)
+ ((long)SEL_UPL << 57)
+ (1L << 59) /* present */
+ (1L << 62) /* 32 bits */
+ (1L << 63); /* page granularity */
tf->tf_scratch.csd = codeseg;
tf->tf_scratch.ssd = dataseg;
tf->tf_scratch.gr24 = dataseg; /* ESD */
tf->tf_scratch.gr27 = dataseg; /* DSD */
tf->tf_scratch.gr28 = dataseg; /* FSD */
tf->tf_scratch.gr29 = dataseg; /* GSD */
gdtseg = gdt /* base */
+ ((8L*NGDT - 1) << 32) /* limit */
+ ((long)SDT_SYSNULL << 52)
+ ((long)SEL_UPL << 57)
+ (1L << 59) /* present */
+ (0L << 62) /* 16 bits */
+ (0L << 63); /* byte granularity */
ldtseg = ldt /* base */
+ ((8L*NLDT - 1) << 32) /* limit */
+ ((long)SDT_SYSLDT << 52)
+ ((long)SEL_UPL << 57)
+ (1L << 59) /* present */
+ (0L << 62) /* 16 bits */
+ (0L << 63); /* byte granularity */
tf->tf_scratch.gr30 = ldtseg; /* LDTD */
tf->tf_scratch.gr31 = gdtseg; /* GDTD */
/* Set ia32 control registers on this processor. */
ia64_set_cflg(CR0_PE | CR0_PG | ((long)(CR4_XMM | CR4_FXSR) << 32));
ia64_set_eflag(PSL_USER);
/* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */
tf->tf_scratch.gr11 = IA32_PS_STRINGS;
/*
* XXX - Linux emulator
* Make sure sure edx is 0x0 on entry. Linux binaries depend
* on it.
*/
td->td_retval[1] = 0;
}
void
ia32_restorectx(struct pcb *pcb)
{
ia64_set_cflg(pcb->pcb_ia32_cflg);
ia64_set_eflag(pcb->pcb_ia32_eflag);
ia64_set_fcr(pcb->pcb_ia32_fcr);
ia64_set_fdr(pcb->pcb_ia32_fdr);
ia64_set_fir(pcb->pcb_ia32_fir);
ia64_set_fsr(pcb->pcb_ia32_fsr);
}
void
ia32_savectx(struct pcb *pcb)
{
pcb->pcb_ia32_cflg = ia64_get_cflg();
pcb->pcb_ia32_eflag = ia64_get_eflag();
pcb->pcb_ia32_fcr = ia64_get_fcr();
pcb->pcb_ia32_fdr = ia64_get_fdr();
pcb->pcb_ia32_fir = ia64_get_fir();
pcb->pcb_ia32_fsr = ia64_get_fsr();
}