Restore the segment registers and segment base MSRs for amd64 syscall
return path only when neither thread was context switched while executing syscall code nor syscall explicitely modified LDT or MSRs. Save segment registers in trap handlers before interrupts are enabled, to not allow context switches to happen before registers are saved. Use separated byte in pcb for indication of fast/full return, since pcb_flags are not synchronized with context switches. The change puts back syscall microbenchmark numbers that were slowed down after commit of the support for LDT on amd64. Reviewed by: jeff Tested (and tested, and tested ...) by: pho Approved by: re (kensmith)
This commit is contained in:
parent
2706e21f2c
commit
1596f53aae
@ -97,6 +97,7 @@ END(cpu_throw)
|
||||
ENTRY(cpu_switch)
|
||||
/* Switch to new thread. First, save context. */
|
||||
movq TD_PCB(%rdi),%r8
|
||||
movb $1,PCB_FULL_IRET(%r8)
|
||||
|
||||
movq (%rsp),%rax /* Hardware registers */
|
||||
movq %r15,PCB_R15(%r8)
|
||||
|
@ -162,19 +162,20 @@ IDTVEC(align)
|
||||
.globl alltraps
|
||||
.type alltraps,@function
|
||||
alltraps:
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz alltraps_testi /* already running with kernel GS.base */
|
||||
swapgs
|
||||
movq PCPU(CURPCB),%rdi
|
||||
movb $0,PCB_FULL_IRET(%rdi)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
alltraps_testi:
|
||||
testl $PSL_I,TF_RFLAGS(%rsp)
|
||||
jz alltraps_pushregs
|
||||
jz alltraps_pushregs_no_rdi
|
||||
sti
|
||||
alltraps_pushregs:
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
alltraps_pushregs_no_rdi:
|
||||
movq %rsi,TF_RSI(%rsp)
|
||||
movq %rdx,TF_RDX(%rsp)
|
||||
@ -233,14 +234,17 @@ calltrap:
|
||||
.globl alltraps_noen
|
||||
.type alltraps_noen,@function
|
||||
alltraps_noen:
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 1f /* already running with kernel GS.base */
|
||||
swapgs
|
||||
movq PCPU(CURPCB),%rdi
|
||||
movb $0,PCB_FULL_IRET(%rdi)
|
||||
1: movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
jmp alltraps_pushregs
|
||||
jmp alltraps_pushregs_no_rdi
|
||||
|
||||
IDTVEC(dblfault)
|
||||
subq $TF_ERR,%rsp
|
||||
@ -278,12 +282,13 @@ IDTVEC(dblfault)
|
||||
IDTVEC(page)
|
||||
subq $TF_ERR,%rsp
|
||||
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
|
||||
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 1f /* already running with kernel GS.base */
|
||||
swapgs
|
||||
1:
|
||||
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
|
||||
movq %cr2,%rdi /* preserve %cr2 before .. */
|
||||
movq PCPU(CURPCB),%rdi
|
||||
movb $0,PCB_FULL_IRET(%rdi)
|
||||
1: movq %cr2,%rdi /* preserve %cr2 before .. */
|
||||
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
@ -311,7 +316,9 @@ IDTVEC(prot)
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
|
||||
jz 2f /* already running with kernel GS.base */
|
||||
1: swapgs
|
||||
2: movw %fs,TF_FS(%rsp)
|
||||
2: movq PCPU(CURPCB),%rdi
|
||||
movb $1,PCB_FULL_IRET(%rdi) /* always full iret from GPF */
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
@ -341,6 +348,8 @@ IDTVEC(fast_syscall)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
movq PCPU(CURPCB),%r11
|
||||
movb $0,PCB_FULL_IRET(%r11)
|
||||
sti
|
||||
movq $KUDSEL,TF_SS(%rsp)
|
||||
movq $KUCSEL,TF_CS(%rsp)
|
||||
@ -644,7 +653,8 @@ doreti_exit:
|
||||
*/
|
||||
testb $SEL_RPL_MASK,TF_CS(%rsp)
|
||||
jz ld_regs
|
||||
|
||||
cmpb $0,PCB_FULL_IRET(%r8)
|
||||
je ld_regs
|
||||
testl $TF_HASSEGS,TF_FLAGS(%rsp)
|
||||
je set_segs
|
||||
|
||||
|
@ -141,6 +141,7 @@ ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
|
||||
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
|
||||
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
|
||||
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
|
||||
ASSYM(PCB_FULL_IRET, offsetof(struct pcb, pcb_full_iret));
|
||||
ASSYM(PCB_DBREGS, PCB_DBREGS);
|
||||
ASSYM(PCB_32BIT, PCB_32BIT);
|
||||
ASSYM(PCB_GS32BIT, PCB_GS32BIT);
|
||||
|
@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
regs->tf_fs = _ufssel;
|
||||
regs->tf_gs = _ugssel;
|
||||
regs->tf_flags = TF_HASSEGS;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
PROC_LOCK(p);
|
||||
mtx_lock(&psp->ps_mtx);
|
||||
}
|
||||
@ -483,6 +484,7 @@ sigreturn(td, uap)
|
||||
signotify(td);
|
||||
PROC_UNLOCK(p);
|
||||
td->td_pcb->pcb_flags |= PCB_FULLCTX;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (EJUSTRETURN);
|
||||
}
|
||||
|
||||
@ -853,6 +855,7 @@ exec_setregs(td, entry, stack, ps_strings)
|
||||
pcb->pcb_gsbase = 0;
|
||||
pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT);
|
||||
pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
|
||||
pcb->pcb_full_iret = 1;
|
||||
|
||||
bzero((char *)regs, sizeof(struct trapframe));
|
||||
regs->tf_rip = entry;
|
||||
@ -2031,6 +2034,7 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
|
||||
td->td_pcb->pcb_gsbase = mcp->mc_gsbase;
|
||||
}
|
||||
td->td_pcb->pcb_flags |= PCB_FULLCTX;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -103,6 +103,7 @@ sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
|
||||
error = amd64_get_ldt(td, largs);
|
||||
break;
|
||||
case I386_SET_LDT:
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
if (largs->descs != NULL) {
|
||||
lp = (struct user_segment_descriptor *)
|
||||
kmem_alloc(kernel_map, largs->num *
|
||||
@ -132,6 +133,7 @@ update_gdt_gsbase(struct thread *td, uint32_t base)
|
||||
|
||||
if (td != curthread)
|
||||
return;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
critical_enter();
|
||||
sd = PCPU_GET(gs32p);
|
||||
sd->sd_lobase = base & 0xffffff;
|
||||
@ -146,6 +148,7 @@ update_gdt_fsbase(struct thread *td, uint32_t base)
|
||||
|
||||
if (td != curthread)
|
||||
return;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
critical_enter();
|
||||
sd = PCPU_GET(fs32p);
|
||||
sd->sd_lobase = base & 0xffffff;
|
||||
@ -201,6 +204,7 @@ sysarch(td, uap)
|
||||
if (!error) {
|
||||
pcb->pcb_fsbase = i386base;
|
||||
td->td_frame->tf_fs = _ufssel;
|
||||
pcb->pcb_full_iret = 1;
|
||||
update_gdt_fsbase(td, i386base);
|
||||
}
|
||||
break;
|
||||
@ -212,6 +216,7 @@ sysarch(td, uap)
|
||||
error = copyin(uap->parms, &i386base, sizeof(i386base));
|
||||
if (!error) {
|
||||
pcb->pcb_gsbase = i386base;
|
||||
pcb->pcb_full_iret = 1;
|
||||
td->td_frame->tf_gs = _ugssel;
|
||||
update_gdt_gsbase(td, i386base);
|
||||
}
|
||||
@ -225,6 +230,7 @@ sysarch(td, uap)
|
||||
if (!error) {
|
||||
if (a64base < VM_MAXUSER_ADDRESS) {
|
||||
pcb->pcb_fsbase = a64base;
|
||||
pcb->pcb_full_iret = 1;
|
||||
td->td_frame->tf_fs = _ufssel;
|
||||
} else
|
||||
error = EINVAL;
|
||||
@ -240,6 +246,7 @@ sysarch(td, uap)
|
||||
if (!error) {
|
||||
if (a64base < VM_MAXUSER_ADDRESS) {
|
||||
pcb->pcb_gsbase = a64base;
|
||||
pcb->pcb_full_iret = 1;
|
||||
td->td_frame->tf_gs = _ugssel;
|
||||
} else
|
||||
error = EINVAL;
|
||||
@ -525,6 +532,7 @@ amd64_set_ldt(td, uap, descs)
|
||||
uap->start, uap->num, (void *)uap->descs);
|
||||
#endif
|
||||
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
p = td->td_proc;
|
||||
if (descs == NULL) {
|
||||
/* Free descriptors */
|
||||
|
@ -186,6 +186,9 @@ cpu_fork(td1, p2, td2, flags)
|
||||
/* As an i386, do not copy io permission bitmap. */
|
||||
pcb2->pcb_tssp = NULL;
|
||||
|
||||
/* New segment registers. */
|
||||
pcb2->pcb_full_iret = 1;
|
||||
|
||||
/* Copy the LDT, if necessary. */
|
||||
mdp1 = &td1->td_proc->p_md;
|
||||
mdp2 = &p2->p_md;
|
||||
@ -336,6 +339,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
|
||||
*/
|
||||
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
|
||||
pcb2->pcb_flags &= ~PCB_FPUINITDONE;
|
||||
pcb2->pcb_full_iret = 1;
|
||||
|
||||
/*
|
||||
* Create a new fresh stack for the new thread.
|
||||
@ -450,6 +454,7 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
|
||||
}
|
||||
#endif
|
||||
td->td_pcb->pcb_fsbase = (register_t)tls_base;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -42,10 +42,16 @@
|
||||
SUPERALIGN_TEXT
|
||||
IDTVEC(int0x80_syscall)
|
||||
swapgs
|
||||
sti
|
||||
pushq $2 /* sizeof "int 0x80" */
|
||||
subq $TF_ERR,%rsp /* skip over tf_trapno */
|
||||
movq %rdi,TF_RDI(%rsp)
|
||||
movq PCPU(CURPCB),%rdi
|
||||
movb $0,PCB_FULL_IRET(%rdi)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
sti
|
||||
movq %rsi,TF_RSI(%rsp)
|
||||
movq %rdx,TF_RDX(%rsp)
|
||||
movq %rcx,TF_RCX(%rsp)
|
||||
@ -60,10 +66,6 @@ IDTVEC(int0x80_syscall)
|
||||
movq %r13,TF_R13(%rsp)
|
||||
movq %r14,TF_R14(%rsp)
|
||||
movq %r15,TF_R15(%rsp)
|
||||
movw %fs,TF_FS(%rsp)
|
||||
movw %gs,TF_GS(%rsp)
|
||||
movw %es,TF_ES(%rsp)
|
||||
movw %ds,TF_DS(%rsp)
|
||||
movl $TF_HASSEGS,TF_FLAGS(%rsp)
|
||||
FAKE_MCOUNT(TF_RIP(%rsp))
|
||||
movq %rsp, %rdi
|
||||
|
@ -125,6 +125,7 @@ set_regs32(struct thread *td, struct reg32 *regs)
|
||||
tp->tf_fs = regs->r_fs;
|
||||
tp->tf_es = regs->r_es;
|
||||
tp->tf_ds = regs->r_ds;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
tp->tf_flags = TF_HASSEGS;
|
||||
tp->tf_rdi = regs->r_edi;
|
||||
tp->tf_rsi = regs->r_esi;
|
||||
|
@ -159,6 +159,7 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
|
||||
ia32_get_fpcontext(td, mcp);
|
||||
mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
|
||||
mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -201,6 +202,7 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
|
||||
tp->tf_rsp = mcp->mc_esp;
|
||||
tp->tf_ss = mcp->mc_ss;
|
||||
td->td_pcb->pcb_flags |= PCB_FULLCTX;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -394,6 +396,7 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
regs->tf_ss = _udatasel;
|
||||
regs->tf_ds = _udatasel;
|
||||
regs->tf_es = _udatasel;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
/* leave user %fs and %gs untouched */
|
||||
PROC_LOCK(p);
|
||||
mtx_lock(&psp->ps_mtx);
|
||||
@ -514,6 +517,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
regs->tf_ss = _udatasel;
|
||||
regs->tf_ds = _udatasel;
|
||||
regs->tf_es = _udatasel;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
/* XXXKIB leave user %fs and %gs untouched */
|
||||
PROC_LOCK(p);
|
||||
mtx_lock(&psp->ps_mtx);
|
||||
@ -611,6 +615,7 @@ freebsd4_freebsd32_sigreturn(td, uap)
|
||||
SIG_CANTMASK(td->td_sigmask);
|
||||
signotify(td);
|
||||
PROC_UNLOCK(p);
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (EJUSTRETURN);
|
||||
}
|
||||
#endif /* COMPAT_FREEBSD4 */
|
||||
@ -702,6 +707,7 @@ freebsd32_sigreturn(td, uap)
|
||||
SIG_CANTMASK(td->td_sigmask);
|
||||
signotify(td);
|
||||
PROC_UNLOCK(p);
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
return (EJUSTRETURN);
|
||||
}
|
||||
|
||||
@ -747,5 +753,6 @@ ia32_setregs(td, entry, stack, ps_strings)
|
||||
/* Return via doreti so that we can change to a different %cs */
|
||||
pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
|
||||
pcb->pcb_flags &= ~PCB_GS32BIT;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
td->td_retval[1] = 0;
|
||||
}
|
||||
|
@ -72,12 +72,13 @@ struct pcb {
|
||||
struct savefpu pcb_save;
|
||||
uint16_t pcb_initial_fpucw;
|
||||
|
||||
caddr_t pcb_onfault; /* copyin/out fault recovery */
|
||||
caddr_t pcb_onfault; /* copyin/out fault recovery */
|
||||
|
||||
/* 32-bit segment descriptor */
|
||||
struct user_segment_descriptor pcb_gs32sd;
|
||||
/* local tss, with i/o bitmap; NULL for common */
|
||||
struct amd64tss *pcb_tssp;
|
||||
char pcb_full_iret;
|
||||
};
|
||||
|
||||
struct xpcb {
|
||||
|
@ -423,6 +423,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
regs->tf_fs = _ufssel;
|
||||
regs->tf_gs = _ugssel;
|
||||
regs->tf_flags = TF_HASSEGS;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
PROC_LOCK(p);
|
||||
mtx_lock(&psp->ps_mtx);
|
||||
}
|
||||
@ -545,6 +546,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
|
||||
regs->tf_fs = _ufssel;
|
||||
regs->tf_gs = _ugssel;
|
||||
regs->tf_flags = TF_HASSEGS;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
PROC_LOCK(p);
|
||||
mtx_lock(&psp->ps_mtx);
|
||||
}
|
||||
@ -645,6 +647,7 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
|
||||
regs->tf_rflags = eflags;
|
||||
regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
|
||||
regs->tf_ss = frame.sf_sc.sc_ss;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
|
||||
return (EJUSTRETURN);
|
||||
}
|
||||
@ -746,6 +749,7 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
|
||||
regs->tf_rflags = eflags;
|
||||
regs->tf_rsp = context->sc_esp_at_signal;
|
||||
regs->tf_ss = context->sc_ss;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
|
||||
/*
|
||||
* call sigaltstack & ignore results..
|
||||
@ -864,6 +868,7 @@ exec_linux_setregs(td, entry, stack, ps_strings)
|
||||
regs->tf_flags = TF_HASSEGS;
|
||||
regs->tf_cs = _ucode32sel;
|
||||
regs->tf_rbx = ps_strings;
|
||||
td->td_pcb->pcb_full_iret = 1;
|
||||
load_cr0(rcr0() | CR0_MP | CR0_TS);
|
||||
fpstate_drop(td);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user