MFP4: Linux set_thread_area syscall (aka TLS) support for amd64.

Initial version was submitted by Divacky Roman and mostly rewritten by me.

Tested by:	emulation
This commit is contained in:
Jung-uk Kim 2007-03-30 00:06:21 +00:00
parent 78f66a0f21
commit 9c5b213e51
9 changed files with 196 additions and 40 deletions

View File

@ -104,11 +104,12 @@ ENTRY(cpu_switch)
testl $PCB_32BIT,PCB_FLAGS(%r8)
jz 1f /* no, skip over */
/* Save segment selector numbers */
movl %ds,PCB_DS(%r8)
movl %es,PCB_ES(%r8)
movl %fs,PCB_FS(%r8)
/* Save userland %gs */
movl %gs,PCB_GS(%r8)
movq PCB_GS32P(%r8),%rax
movq (%rax),%rax
movq %rax,PCB_GS32SD(%r8)
1:
/* Test if debug registers should be saved. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
@ -170,22 +171,6 @@ sw1:
*/
movq TD_PCB(%rsi),%r8
testl $PCB_32BIT,PCB_FLAGS(%r8)
jz 1f /* no, skip over */
/* Restore segment selector numbers */
movl PCB_DS(%r8),%ds
movl PCB_ES(%r8),%es
movl PCB_FS(%r8),%fs
/* Restore userland %gs while preserving kernel gsbase */
movl $MSR_GSBASE,%ecx
rdmsr
movl PCB_GS(%r8),%gs
wrmsr
jmp 2f
1:
/* Restore userland %fs */
movl $MSR_FSBASE,%ecx
movl PCB_FSBASE(%r8),%eax
@ -197,7 +182,6 @@ sw1:
movl PCB_GSBASE(%r8),%eax
movl PCB_GSBASE+4(%r8),%edx
wrmsr
2:
/* Update the TSS_RSP0 pointer for the next interrupt */
movq PCPU(TSSP), %rax
@ -211,6 +195,19 @@ sw1:
movl %eax, PCPU(CURTID)
movq %rsi, PCPU(CURTHREAD) /* into next thread */
testl $PCB_32BIT,PCB_FLAGS(%r8)
jz 1f /* no, skip over */
/* Restore userland %gs while preserving kernel gsbase */
movq PCB_GS32P(%r8),%rax
movq PCB_GS32SD(%r8),%rbx
movq %rbx,(%rax)
movl $MSR_GSBASE,%ecx
rdmsr
movl PCB_GS(%r8),%gs
wrmsr
1:
/* Restore context. */
movq PCB_RBX(%r8),%rbx
movq PCB_RSP(%r8),%rsp

View File

@ -136,12 +136,14 @@ ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_32BIT, PCB_32BIT);
ASSYM(PCB_FULLCTX, PCB_FULLCTX);
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_FULLCTX, PCB_FULLCTX);
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_GS32P, offsetof(struct pcb, pcb_gs32p));
ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
ASSYM(PCB_SIZE, sizeof(struct pcb));

View File

@ -725,6 +725,15 @@ struct soft_segment_descriptor gdt_segs[] = {
0, /* long */
0, /* default 32 vs 16 bit size */
0 /* limit granularity (byte/page units)*/ },
/* GUGS32_SEL 8 32 bit GS Descriptor for user */
{ 0x0, /* segment base address */
0xfffff, /* length - all address space */
SDT_MEMRWA, /* segment type */
SEL_UPL, /* segment descriptor priority level */
1, /* segment descriptor present */
0, /* long */
1, /* default 32 vs 16 bit size */
1 /* limit granularity (byte/page units)*/ },
};
void

View File

@ -41,6 +41,7 @@
* AMD64 process control block
*/
#include <machine/fpu.h>
#include <machine/segments.h>
struct pcb {
register_t pcb_cr3;
@ -73,6 +74,10 @@ struct pcb {
#define PCB_FULLCTX 0x80 /* full context restore on sysret */
caddr_t pcb_onfault; /* copyin/out fault recovery */
/* 32-bit segment descriptor */
struct user_segment_descriptor *pcb_gs32p;
struct user_segment_descriptor pcb_gs32sd;
};
#ifdef _KERNEL

View File

@ -200,9 +200,10 @@ struct region_descriptor {
#define GUCODE32_SEL 3 /* User 32 bit code Descriptor */
#define GUDATA_SEL 4 /* User 32/64 bit Data Descriptor */
#define GUCODE_SEL 5 /* User 64 bit Code Descriptor */
#define GPROC0_SEL 6 /* TSS for entering kernel etc */
#define GPROC0_SEL 6 /* TSS for entering kernel etc */
/* slot 6 is second half of GPROC0_SEL */
#define NGDT 8
#define GUGS32_SEL 8 /* User 32 bit GS Descriptor */
#define NGDT 9
#ifdef _KERNEL
extern struct user_segment_descriptor gdt[];

View File

@ -11,8 +11,6 @@
NON_GPROF_ENTRY(linux_sigcode)
call *LINUX_SIGF_HANDLER(%esp)
leal LINUX_SIGF_SC(%esp),%ebx /* linux scp */
movl LINUX_SC_GS(%ebx),%gs
movl LINUX_SC_FS(%ebx),%fs
movl LINUX_SC_ES(%ebx),%es
movl LINUX_SC_DS(%ebx),%ds
movl %esp, %ebx /* pass sigframe */
@ -25,8 +23,6 @@ NON_GPROF_ENTRY(linux_sigcode)
linux_rt_sigcode:
call *LINUX_RT_SIGF_HANDLER(%esp)
leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */
movl LINUX_SC_GS(%ebx),%gs
movl LINUX_SC_FS(%ebx),%fs
movl LINUX_SC_ES(%ebx),%es
movl LINUX_SC_DS(%ebx),%ds
push %eax /* fake ret addr */

View File

@ -53,7 +53,10 @@ __FBSDID("$FreeBSD$");
#include <sys/unistd.h>
#include <machine/frame.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@ -656,7 +659,43 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
td2->td_frame->tf_rsp = PTROUT(args->stack);
if (args->flags & LINUX_CLONE_SETTLS) {
/* XXX: todo */
struct user_segment_descriptor sd;
struct l_user_desc info;
int a[2];
error = copyin((void *)td->td_frame->tf_rsi, &info,
sizeof(struct l_user_desc));
if (error) {
printf(LMSG("copyin failed!"));
} else {
/* We might copy out the entry_number as GUGS32_SEL. */
info.entry_number = GUGS32_SEL;
error = copyout(&info, (void *)td->td_frame->tf_rsi,
sizeof(struct l_user_desc));
if (error)
printf(LMSG("copyout failed!"));
a[0] = LINUX_LDT_entry_a(&info);
a[1] = LINUX_LDT_entry_b(&info);
memcpy(&sd, &a, sizeof(a));
#ifdef DEBUG
if (ldebug(clone))
printf("Segment created in clone with "
"CLONE_SETTLS: lobase: %x, hibase: %x, "
"lolimit: %x, hilimit: %x, type: %i, "
"dpl: %i, p: %i, xx: %i, long: %i, "
"def32: %i, gran: %i\n", sd.sd_lobase,
sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
sd.sd_long, sd.sd_def32, sd.sd_gran);
#endif
td2->td_pcb->pcb_gsbase = (register_t)info.base_addr;
td2->td_pcb->pcb_gs32sd = sd;
td2->td_pcb->pcb_gs32p = &gdt[GUGS32_SEL];
td2->td_pcb->pcb_gs = GSEL(GUGS32_SEL, SEL_UPL);
td2->td_pcb->pcb_flags |= PCB_32BIT;
}
}
#ifdef DEBUG
@ -904,6 +943,19 @@ linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
return (error);
}
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
struct mprotect_args bsd_args;
bsd_args.addr = uap->addr;
bsd_args.len = uap->len;
bsd_args.prot = uap->prot;
if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
bsd_args.prot |= PROT_READ | PROT_EXEC;
return (mprotect(td, &bsd_args));
}
int
linux_iopl(struct thread *td, struct linux_iopl_args *args)
{
@ -1177,14 +1229,104 @@ linux_sched_rr_get_interval(struct thread *td,
}
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
linux_set_thread_area(struct thread *td,
struct linux_set_thread_area_args *args)
{
struct mprotect_args bsd_args;
struct l_user_desc info;
struct user_segment_descriptor sd;
int a[2];
int error;
bsd_args.addr = uap->addr;
bsd_args.len = uap->len;
bsd_args.prot = uap->prot;
if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
bsd_args.prot |= PROT_READ | PROT_EXEC;
return (mprotect(td, &bsd_args));
error = copyin(args->desc, &info, sizeof(struct l_user_desc));
if (error)
return (error);
#ifdef DEBUG
if (ldebug(set_thread_area))
printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
"%i, %i, %i"), info.entry_number, info.base_addr,
info.limit, info.seg_32bit, info.contents,
info.read_exec_only, info.limit_in_pages,
info.seg_not_present, info.useable);
#endif
/*
* Semantics of Linux version: every thread in the system has array
* of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
* This syscall loads one of the selected TLS decriptors with a value
* and also loads GDT descriptors 6, 7 and 8 with the content of
* the per-thread descriptors.
*
* Semantics of FreeBSD version: I think we can ignore that Linux has
* three per-thread descriptors and use just the first one.
* The tls_array[] is used only in [gs]et_thread_area() syscalls and
* for loading the GDT descriptors. We use just one GDT descriptor
* for TLS, so we will load just one.
* XXX: This doesnt work when user-space process tries to use more
* than one TLS segment. Comment in the Linux source says wine might
* do that.
*/
/*
* GLIBC reads current %gs and call set_thread_area() with it.
* We should let GUDATA_SEL and GUGS32_SEL proceed as well because
* we use these segments.
*/
switch (info.entry_number) {
case GUGS32_SEL:
case GUDATA_SEL:
case 6:
case -1:
info.entry_number = GUGS32_SEL;
break;
default:
return (EINVAL);
}
/*
* We have to copy out the GDT entry we use.
* XXX: What if userspace program does not check return value and
* tries to use 6, 7 or 8?
*/
error = copyout(&info, args->desc, sizeof(struct l_user_desc));
if (error)
return (error);
if (LINUX_LDT_empty(&info)) {
a[0] = 0;
a[1] = 0;
} else {
a[0] = LINUX_LDT_entry_a(&info);
a[1] = LINUX_LDT_entry_b(&info);
}
memcpy(&sd, &a, sizeof(a));
#ifdef DEBUG
if (ldebug(set_thread_area))
printf("Segment created in set_thread_area: "
"lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
"type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
"def32: %i, gran: %i\n",
sd.sd_lobase,
sd.sd_hibase,
sd.sd_lolimit,
sd.sd_hilimit,
sd.sd_type,
sd.sd_dpl,
sd.sd_p,
sd.sd_xx,
sd.sd_long,
sd.sd_def32,
sd.sd_gran);
#endif
critical_enter();
td->td_pcb->pcb_gsbase = (register_t)info.base_addr;
td->td_pcb->pcb_gs32sd = gdt[GUGS32_SEL] = sd;
td->td_pcb->pcb_gs32p = &gdt[GUGS32_SEL];
td->td_pcb->pcb_flags |= PCB_32BIT;
wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
critical_exit();
return (0);
}

View File

@ -408,6 +408,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
/* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -528,6 +529,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
/* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -813,18 +815,20 @@ exec_linux_setregs(td, entry, stack, ps_strings)
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
load_gs(0);
load_gs(_udatasel);
pcb->pcb_ds = _udatasel;
pcb->pcb_es = _udatasel;
pcb->pcb_fs = _udatasel;
pcb->pcb_gs = 0;
pcb->pcb_gs = _udatasel;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;

View File

@ -409,7 +409,7 @@
struct l_timespec *timeout, void *uaddr2, int val3); }
241 AUE_NULL UNIMPL linux_sched_setaffinity
242 AUE_NULL UNIMPL linux_sched_getaffinity
243 AUE_NULL UNIMPL linux_set_thread_area
243 AUE_NULL STD { int linux_set_thread_area(struct l_user_desc *desc); }
244 AUE_NULL UNIMPL linux_get_thread_area
245 AUE_NULL UNIMPL linux_io_setup
246 AUE_NULL UNIMPL linux_io_destroy