Restore the segment registers and segment base MSRs for amd64 syscall

return path only when neither thread was context switched while
executing syscall code nor syscall explicitely modified LDT or MSRs.

Save segment registers in trap handlers before interrupts are enabled,
to not allow context switches to happen before registers are saved.
Use separated byte in pcb for indication of fast/full return, since
pcb_flags are not synchronized with context switches.

The change puts back syscall microbenchmark numbers that were slowed
down after commit of the support for LDT on amd64.

Reviewed by:	jeff
Tested (and tested, and tested ...) by:	pho
Approved by:	re (kensmith)
This commit is contained in:
kib 2009-07-09 09:34:11 +00:00
parent 2706e21f2c
commit 1596f53aae
11 changed files with 60 additions and 15 deletions

View File

@ -97,6 +97,7 @@ END(cpu_throw)
ENTRY(cpu_switch)
/* Switch to new thread. First, save context. */
movq TD_PCB(%rdi),%r8
movb $1,PCB_FULL_IRET(%r8)
movq (%rsp),%rax /* Hardware registers */
movq %r15,PCB_R15(%r8)

View File

@ -162,19 +162,20 @@ IDTVEC(align)
.globl alltraps
.type alltraps,@function
alltraps:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_testi /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
movb $0,PCB_FULL_IRET(%rdi)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
alltraps_testi:
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs
jz alltraps_pushregs_no_rdi
sti
alltraps_pushregs:
movq %rdi,TF_RDI(%rsp)
alltraps_pushregs_no_rdi:
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
@ -233,14 +234,17 @@ calltrap:
.globl alltraps_noen
.type alltraps_noen,@function
alltraps_noen:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
movb $0,PCB_FULL_IRET(%rdi)
1: movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
jmp alltraps_pushregs
jmp alltraps_pushregs_no_rdi
IDTVEC(dblfault)
subq $TF_ERR,%rsp
@ -278,12 +282,13 @@ IDTVEC(dblfault)
IDTVEC(page)
subq $TF_ERR,%rsp
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
1:
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
movq %cr2,%rdi /* preserve %cr2 before .. */
movq PCPU(CURPCB),%rdi
movb $0,PCB_FULL_IRET(%rdi)
1: movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
@ -311,7 +316,9 @@ IDTVEC(prot)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 2f /* already running with kernel GS.base */
1: swapgs
2: movw %fs,TF_FS(%rsp)
2: movq PCPU(CURPCB),%rdi
movb $1,PCB_FULL_IRET(%rdi) /* always full iret from GPF */
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
@ -341,6 +348,8 @@ IDTVEC(fast_syscall)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
movq PCPU(CURPCB),%r11
movb $0,PCB_FULL_IRET(%r11)
sti
movq $KUDSEL,TF_SS(%rsp)
movq $KUCSEL,TF_CS(%rsp)
@ -644,7 +653,8 @@ doreti_exit:
*/
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz ld_regs
cmpb $0,PCB_FULL_IRET(%r8)
je ld_regs
testl $TF_HASSEGS,TF_FLAGS(%rsp)
je set_segs

View File

@ -141,6 +141,7 @@ ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
ASSYM(PCB_FULL_IRET, offsetof(struct pcb, pcb_full_iret));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_32BIT, PCB_32BIT);
ASSYM(PCB_GS32BIT, PCB_GS32BIT);

View File

@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
td->td_pcb->pcb_full_iret = 1;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -483,6 +484,7 @@ sigreturn(td, uap)
signotify(td);
PROC_UNLOCK(p);
td->td_pcb->pcb_flags |= PCB_FULLCTX;
td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
@ -853,6 +855,7 @@ exec_setregs(td, entry, stack, ps_strings)
pcb->pcb_gsbase = 0;
pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT);
pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
pcb->pcb_full_iret = 1;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
@ -2031,6 +2034,7 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
td->td_pcb->pcb_gsbase = mcp->mc_gsbase;
}
td->td_pcb->pcb_flags |= PCB_FULLCTX;
td->td_pcb->pcb_full_iret = 1;
return (0);
}

View File

@ -103,6 +103,7 @@ sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
error = amd64_get_ldt(td, largs);
break;
case I386_SET_LDT:
td->td_pcb->pcb_full_iret = 1;
if (largs->descs != NULL) {
lp = (struct user_segment_descriptor *)
kmem_alloc(kernel_map, largs->num *
@ -132,6 +133,7 @@ update_gdt_gsbase(struct thread *td, uint32_t base)
if (td != curthread)
return;
td->td_pcb->pcb_full_iret = 1;
critical_enter();
sd = PCPU_GET(gs32p);
sd->sd_lobase = base & 0xffffff;
@ -146,6 +148,7 @@ update_gdt_fsbase(struct thread *td, uint32_t base)
if (td != curthread)
return;
td->td_pcb->pcb_full_iret = 1;
critical_enter();
sd = PCPU_GET(fs32p);
sd->sd_lobase = base & 0xffffff;
@ -201,6 +204,7 @@ sysarch(td, uap)
if (!error) {
pcb->pcb_fsbase = i386base;
td->td_frame->tf_fs = _ufssel;
pcb->pcb_full_iret = 1;
update_gdt_fsbase(td, i386base);
}
break;
@ -212,6 +216,7 @@ sysarch(td, uap)
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
pcb->pcb_gsbase = i386base;
pcb->pcb_full_iret = 1;
td->td_frame->tf_gs = _ugssel;
update_gdt_gsbase(td, i386base);
}
@ -225,6 +230,7 @@ sysarch(td, uap)
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
pcb->pcb_fsbase = a64base;
pcb->pcb_full_iret = 1;
td->td_frame->tf_fs = _ufssel;
} else
error = EINVAL;
@ -240,6 +246,7 @@ sysarch(td, uap)
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
pcb->pcb_gsbase = a64base;
pcb->pcb_full_iret = 1;
td->td_frame->tf_gs = _ugssel;
} else
error = EINVAL;
@ -525,6 +532,7 @@ amd64_set_ldt(td, uap, descs)
uap->start, uap->num, (void *)uap->descs);
#endif
td->td_pcb->pcb_full_iret = 1;
p = td->td_proc;
if (descs == NULL) {
/* Free descriptors */

View File

@ -186,6 +186,9 @@ cpu_fork(td1, p2, td2, flags)
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
/* New segment registers. */
pcb2->pcb_full_iret = 1;
/* Copy the LDT, if necessary. */
mdp1 = &td1->td_proc->p_md;
mdp2 = &p2->p_md;
@ -336,6 +339,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
*/
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
pcb2->pcb_flags &= ~PCB_FPUINITDONE;
pcb2->pcb_full_iret = 1;
/*
* Create a new fresh stack for the new thread.
@ -450,6 +454,7 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
}
#endif
td->td_pcb->pcb_fsbase = (register_t)tls_base;
td->td_pcb->pcb_full_iret = 1;
return (0);
}

View File

@ -42,10 +42,16 @@
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
swapgs
sti
pushq $2 /* sizeof "int 0x80" */
subq $TF_ERR,%rsp /* skip over tf_trapno */
movq %rdi,TF_RDI(%rsp)
movq PCPU(CURPCB),%rdi
movb $0,PCB_FULL_IRET(%rdi)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
sti
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
@ -60,10 +66,6 @@ IDTVEC(int0x80_syscall)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi

View File

@ -125,6 +125,7 @@ set_regs32(struct thread *td, struct reg32 *regs)
tp->tf_fs = regs->r_fs;
tp->tf_es = regs->r_es;
tp->tf_ds = regs->r_ds;
td->td_pcb->pcb_full_iret = 1;
tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = regs->r_edi;
tp->tf_rsi = regs->r_esi;

View File

@ -159,6 +159,7 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
ia32_get_fpcontext(td, mcp);
mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
td->td_pcb->pcb_full_iret = 1;
return (0);
}
@ -201,6 +202,7 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
tp->tf_rsp = mcp->mc_esp;
tp->tf_ss = mcp->mc_ss;
td->td_pcb->pcb_flags |= PCB_FULLCTX;
td->td_pcb->pcb_full_iret = 1;
return (0);
}
@ -394,6 +396,7 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_ss = _udatasel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
td->td_pcb->pcb_full_iret = 1;
/* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
@ -514,6 +517,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_ss = _udatasel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
td->td_pcb->pcb_full_iret = 1;
/* XXXKIB leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
@ -611,6 +615,7 @@ freebsd4_freebsd32_sigreturn(td, uap)
SIG_CANTMASK(td->td_sigmask);
signotify(td);
PROC_UNLOCK(p);
td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
#endif /* COMPAT_FREEBSD4 */
@ -702,6 +707,7 @@ freebsd32_sigreturn(td, uap)
SIG_CANTMASK(td->td_sigmask);
signotify(td);
PROC_UNLOCK(p);
td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
@ -747,5 +753,6 @@ ia32_setregs(td, entry, stack, ps_strings)
/* Return via doreti so that we can change to a different %cs */
pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
pcb->pcb_flags &= ~PCB_GS32BIT;
td->td_pcb->pcb_full_iret = 1;
td->td_retval[1] = 0;
}

View File

@ -72,12 +72,13 @@ struct pcb {
struct savefpu pcb_save;
uint16_t pcb_initial_fpucw;
caddr_t pcb_onfault; /* copyin/out fault recovery */
caddr_t pcb_onfault; /* copyin/out fault recovery */
/* 32-bit segment descriptor */
struct user_segment_descriptor pcb_gs32sd;
/* local tss, with i/o bitmap; NULL for common */
struct amd64tss *pcb_tssp;
char pcb_full_iret;
};
struct xpcb {

View File

@ -423,6 +423,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
td->td_pcb->pcb_full_iret = 1;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -545,6 +546,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
td->td_pcb->pcb_full_iret = 1;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@ -645,6 +647,7 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
regs->tf_rflags = eflags;
regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
regs->tf_ss = frame.sf_sc.sc_ss;
td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
@ -746,6 +749,7 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
regs->tf_rflags = eflags;
regs->tf_rsp = context->sc_esp_at_signal;
regs->tf_ss = context->sc_ss;
td->td_pcb->pcb_full_iret = 1;
/*
* call sigaltstack & ignore results..
@ -864,6 +868,7 @@ exec_linux_setregs(td, entry, stack, ps_strings)
regs->tf_flags = TF_HASSEGS;
regs->tf_cs = _ucode32sel;
regs->tf_rbx = ps_strings;
td->td_pcb->pcb_full_iret = 1;
load_cr0(rcr0() | CR0_MP | CR0_TS);
fpstate_drop(td);