Introduce the x86 kernel interfaces to allow kernel code to use

FPU/SSE hardware. Caller should provide a save area that is chained
into the stack of the areas; pcb save_area for usermode FPU state is
on top. The pcb now contains a pointer to the current FPU saved area,
used during FPUDNA handling and context switches.  There is also a
facility to allow the kernel thread to use pcb save_area.

Change the dreaded warnings "npxdna in kernel mode!" into the panics
when FPU usage is not registered.

KPI discussed with:	fabient
Tested by:    pho, fabient
Hardware provided by:	Sentex Communications
MFC after:    1 month
This commit is contained in:
Konstantin Belousov 2010-06-05 15:59:59 +00:00
parent 7ba8f7307a
commit 6cf9a08d2c
20 changed files with 441 additions and 85 deletions

View File

@ -245,7 +245,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
cr3 = rcr3();
load_cr3(KPML4phys);
stopfpu = &stopxpcbs[0]->xpcb_pcb.pcb_save;
stopfpu = stopxpcbs[0]->xpcb_pcb.pcb_save;
if (acpi_savecpu(stopxpcbs[0])) {
fpugetregs(curthread, stopfpu);

View File

@ -116,7 +116,7 @@ done_store_dr:
/* have we used fp, and need a save? */
cmpq %rdi,PCPU(FPCURTHREAD)
jne 1f
addq $PCB_SAVEFPU,%r8
movq PCB_SAVEFPU(%r8),%r8
clts
fxsave (%r8)
smsw %ax
@ -341,7 +341,7 @@ ENTRY(savectx)
je 1f
movq TD_PCB(%rax),%rdi
leaq PCB_SAVEFPU(%rdi),%rdi
movq PCB_SAVEFPU(%rdi),%rdi
clts
fxsave (%rdi)
smsw %ax
@ -349,7 +349,7 @@ ENTRY(savectx)
lmsw %ax
movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */
leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */
movq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */
/* arg 1 (%rdi) already loaded */
call bcopy
1:

View File

@ -91,8 +91,8 @@ void stop_emulating(void);
#endif /* __GNUCLIKE_ASM && !lint */
#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw)
#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw)
#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw)
#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw)
typedef u_char bool_t;
@ -146,7 +146,7 @@ fpuexit(struct thread *td)
savecrit = intr_disable();
if (curthread == PCPU_GET(fpcurthread)) {
stop_emulating();
fxsave(&PCPU_GET(curpcb)->pcb_save);
fxsave(PCPU_GET(curpcb)->pcb_save);
start_emulating();
PCPU_SET(fpcurthread, 0);
}
@ -424,8 +424,10 @@ fpudna(void)
if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
fldcw(&pcb->pcb_initial_fpucw);
pcb->pcb_flags |= PCB_FPUINITDONE;
if (PCB_USER_FPU(pcb))
pcb->pcb_flags |= PCB_USERFPUINITDONE;
} else
fxrstor(&pcb->pcb_save);
fxrstor(pcb->pcb_save);
intr_restore(s);
}
@ -448,14 +450,40 @@ fpudrop()
* Get the state of the FPU without dropping ownership (if possible).
* It returns the FPU ownership status.
*/
int
fpugetuserregs(struct thread *td, struct savefpu *addr)
{
register_t s;
struct pcb *pcb;
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate));
addr->sv_env.en_cw = pcb->pcb_initial_fpucw;
return (_MC_FPOWNED_NONE);
}
s = intr_disable();
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
fxsave(addr);
intr_restore(s);
return (_MC_FPOWNED_FPU);
} else {
intr_restore(s);
bcopy(&pcb->pcb_user_save, addr, sizeof(*addr));
return (_MC_FPOWNED_PCB);
}
}
int
fpugetregs(struct thread *td, struct savefpu *addr)
{
register_t s;
struct pcb *pcb;
if ((td->td_pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate));
addr->sv_env.en_cw = td->td_pcb->pcb_initial_fpucw;
addr->sv_env.en_cw = pcb->pcb_initial_fpucw;
return (_MC_FPOWNED_NONE);
}
s = intr_disable();
@ -465,7 +493,7 @@ fpugetregs(struct thread *td, struct savefpu *addr)
return (_MC_FPOWNED_FPU);
} else {
intr_restore(s);
bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr));
bcopy(pcb->pcb_save, addr, sizeof(*addr));
return (_MC_FPOWNED_PCB);
}
}
@ -473,20 +501,45 @@ fpugetregs(struct thread *td, struct savefpu *addr)
/*
* Set the state of the FPU.
*/
void
fpusetuserregs(struct thread *td, struct savefpu *addr)
{
register_t s;
struct pcb *pcb;
pcb = td->td_pcb;
s = intr_disable();
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
fxrstor(addr);
intr_restore(s);
pcb->pcb_flags |= PCB_FPUINITDONE | PCB_USERFPUINITDONE;
} else {
intr_restore(s);
bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr));
if (PCB_USER_FPU(pcb))
pcb->pcb_flags |= PCB_FPUINITDONE;
pcb->pcb_flags |= PCB_USERFPUINITDONE;
}
}
void
fpusetregs(struct thread *td, struct savefpu *addr)
{
register_t s;
struct pcb *pcb;
pcb = td->td_pcb;
s = intr_disable();
if (td == PCPU_GET(fpcurthread)) {
fxrstor(addr);
intr_restore(s);
} else {
intr_restore(s);
bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr));
bcopy(addr, td->td_pcb->pcb_save, sizeof(*addr));
}
curthread->td_pcb->pcb_flags |= PCB_FPUINITDONE;
if (PCB_USER_FPU(pcb))
pcb->pcb_flags |= PCB_USERFPUINITDONE;
pcb->pcb_flags |= PCB_FPUINITDONE;
}
/*
@ -575,3 +628,74 @@ static devclass_t fpupnp_devclass;
DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0);
#endif /* DEV_ISA */
int
fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
{
struct pcb *pcb;
pcb = td->td_pcb;
KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
("mangled pcb_save"));
ctx->flags = 0;
if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0)
ctx->flags |= FPU_KERN_CTX_FPUINITDONE;
fpuexit(td);
ctx->prev = pcb->pcb_save;
pcb->pcb_save = &ctx->hwstate;
pcb->pcb_flags |= PCB_KERNFPU;
pcb->pcb_flags &= ~PCB_FPUINITDONE;
return (0);
}
int
fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
{
struct pcb *pcb;
register_t savecrit;
pcb = td->td_pcb;
savecrit = intr_disable();
if (curthread == PCPU_GET(fpcurthread))
fpudrop();
intr_restore(savecrit);
pcb->pcb_save = ctx->prev;
if (pcb->pcb_save == &pcb->pcb_user_save) {
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0)
pcb->pcb_flags |= PCB_FPUINITDONE;
else
pcb->pcb_flags &= ~PCB_FPUINITDONE;
pcb->pcb_flags &= ~PCB_KERNFPU;
} else {
if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0)
pcb->pcb_flags |= PCB_FPUINITDONE;
else
pcb->pcb_flags &= ~PCB_FPUINITDONE;
KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
}
return (0);
}
int
fpu_kern_thread(u_int flags)
{
struct pcb *pcb;
pcb = PCPU_GET(curpcb);
KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
("Only kthread may use fpu_kern_thread"));
KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save"));
KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
pcb->pcb_flags |= PCB_KERNFPU;
return (0);
}
int
is_fpu_kern_thread(u_int flags)
{
if ((curthread->td_pflags & TDP_KTHREAD) == 0)
return (0);
return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0);
}

View File

@ -1960,7 +1960,7 @@ int
fill_fpregs(struct thread *td, struct fpreg *fpregs)
{
fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs);
fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs);
return (0);
}
@ -1969,7 +1969,7 @@ int
set_fpregs(struct thread *td, struct fpreg *fpregs)
{
set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save);
set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save);
return (0);
}
@ -2084,7 +2084,8 @@ static void
get_fpcontext(struct thread *td, mcontext_t *mcp)
{
mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate);
mcp->mc_ownedfp = fpugetuserregs(td,
(struct savefpu *)&mcp->mc_fpstate);
mcp->mc_fpformat = fpuformat();
}
@ -2109,7 +2110,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp)
*/
fpstate = (struct savefpu *)&mcp->mc_fpstate;
fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
fpusetregs(td, fpstate);
fpusetuserregs(td, fpstate);
} else
return (EINVAL);
return (0);
@ -2120,6 +2121,7 @@ fpstate_drop(struct thread *td)
{
register_t s;
KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
s = intr_disable();
if (PCPU_GET(fpcurthread) == td)
fpudrop();
@ -2133,7 +2135,8 @@ fpstate_drop(struct thread *td)
* sendsig() is the only caller of fpugetregs()... perhaps we just
* have too many layers.
*/
curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE;
curthread->td_pcb->pcb_flags &= ~(PCB_FPUINITDONE |
PCB_USERFPUINITDONE);
intr_restore(s);
}

View File

@ -1247,7 +1247,7 @@ cpususpend_handler(void)
rf = intr_disable();
cr3 = rcr3();
stopfpu = &stopxpcbs[cpu]->xpcb_pcb.pcb_save;
stopfpu = stopxpcbs[cpu]->xpcb_pcb.pcb_save;
if (savectx2(stopxpcbs[cpu])) {
fpugetregs(curthread, stopfpu);
wbinvd();

View File

@ -425,6 +425,8 @@ trap(struct trapframe *frame)
case T_DNA:
/* transparent fault (due to context switch "late") */
KASSERT(PCB_USER_FPU(td->td_pcb),
("kernel FPU ctx has leaked"));
fpudna();
goto userout;
@ -449,18 +451,21 @@ trap(struct trapframe *frame)
goto out;
case T_DNA:
/*
* The kernel is apparently using fpu for copying.
* XXX this should be fatal unless the kernel has
* registered such use.
*/
printf("fpudna in kernel mode!\n");
#ifdef KDB
kdb_backtrace();
#endif
KASSERT(!PCB_USER_FPU(td->td_pcb),
("Unregistered use of FPU in kernel"));
fpudna();
goto out;
case T_ARITHTRAP: /* arithmetic trap */
case T_XMMFLT: /* SIMD floating-point exception */
case T_FPOPFLT: /* FPU operand fetch fault */
/*
* XXXKIB for now disable any FPU traps in kernel
* handler registration seems to be overkill
*/
trap_fatal(frame, 0);
goto out;
case T_STKFLT: /* stack fault */
break;
@ -603,6 +608,8 @@ trap(struct trapframe *frame)
user:
userret(td, frame);
mtx_assert(&Giant, MA_NOTOWNED);
KASSERT(PCB_USER_FPU(td->td_pcb),
("Return from trap with kernel FPU ctx leaked"));
userout:
out:
return;
@ -891,5 +898,12 @@ syscall(struct trapframe *frame)
trapsignal(td, &ksi);
}
KASSERT(PCB_USER_FPU(td->td_pcb),
("System call %s returing with kernel FPU ctx leaked",
syscallname(td->td_proc, sa.code)));
KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
("System call %s returning with mangled pcb_save",
syscallname(td->td_proc, sa.code)));
syscallret(td, error, &sa);
}

View File

@ -122,7 +122,7 @@ cpu_fork(td1, p2, td2, flags)
return;
}
/* Ensure that p1's pcb is up to date. */
/* Ensure that td1's pcb is up to date. */
fpuexit(td1);
/* Point the pcb to the top of the stack */
@ -130,9 +130,12 @@ cpu_fork(td1, p2, td2, flags)
td2->td_kstack_pages * PAGE_SIZE) - 1;
td2->td_pcb = pcb2;
/* Copy p1's pcb */
/* Copy td1's pcb */
bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
/* Properly initialize pcb_save */
pcb2->pcb_save = &pcb2->pcb_user_save;
/* Point mdproc and then copy over td1's contents */
mdp2 = &p2->p_md;
bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
@ -308,6 +311,7 @@ cpu_thread_alloc(struct thread *td)
td->td_pcb = (struct pcb *)(td->td_kstack +
td->td_kstack_pages * PAGE_SIZE) - 1;
td->td_frame = (struct trapframe *)td->td_pcb - 1;
td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save;
}
void
@ -381,7 +385,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
* values here.
*/
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
pcb2->pcb_flags &= ~PCB_FPUINITDONE;
pcb2->pcb_flags &= ~(PCB_FPUINITDONE | PCB_USERFPUINITDONE);
pcb2->pcb_save = &pcb2->pcb_user_save;
pcb2->pcb_full_iret = 1;
/*

View File

@ -147,7 +147,7 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs)
{
struct save87 *sv_87 = (struct save87 *)regs;
struct env87 *penv_87 = &sv_87->sv_env;
struct savefpu *sv_fpu = &td->td_pcb->pcb_save;
struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save;
struct envxmm *penv_xmm = &sv_fpu->sv_env;
int i;
@ -182,7 +182,7 @@ set_fpregs32(struct thread *td, struct fpreg32 *regs)
{
struct save87 *sv_87 = (struct save87 *)regs;
struct env87 *penv_87 = &sv_87->sv_env;
struct savefpu *sv_fpu = &td->td_pcb->pcb_save;
struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save;
struct envxmm *penv_xmm = &sv_fpu->sv_env;
int i;

View File

@ -73,6 +73,17 @@ struct savefpu {
u_char sv_pad[96];
} __aligned(16);
#ifdef _KERNEL
struct fpu_kern_ctx {
struct savefpu hwstate;
struct savefpu *prev;
uint32_t flags;
};
#define FPU_KERN_CTX_FPUINITDONE 0x01
#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0)
#endif
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@ -102,9 +113,22 @@ void fpudrop(void);
void fpuexit(struct thread *td);
int fpuformat(void);
int fpugetregs(struct thread *td, struct savefpu *addr);
int fpugetuserregs(struct thread *td, struct savefpu *addr);
void fpuinit(void);
void fpusetregs(struct thread *td, struct savefpu *addr);
void fpusetuserregs(struct thread *td, struct savefpu *addr);
int fputrap(void);
int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx,
u_int flags);
int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx);
int fpu_kern_thread(u_int flags);
int is_fpu_kern_thread(u_int flags);
/*
* Flags for fpu_kern_enter() and fpu_kern_thread().
*/
#define FPU_KERN_NORMAL 0x0000
#endif
#endif /* !_MACHINE_FPU_H_ */

View File

@ -57,7 +57,9 @@ struct pcb {
register_t pcb_gsbase;
u_long pcb_flags;
#define PCB_DBREGS 0x02 /* process using debug registers */
#define PCB_KERNFPU 0x04 /* kernel uses fpu */
#define PCB_FPUINITDONE 0x08 /* fpu state is initialized */
#define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */
#define PCB_GS32BIT 0x20 /* linux gs switch */
#define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */
#define PCB_FULLCTX 0x80 /* full context restore on sysret */
@ -69,7 +71,7 @@ struct pcb {
u_int64_t pcb_dr6;
u_int64_t pcb_dr7;
struct savefpu pcb_save;
struct savefpu pcb_user_save;
uint16_t pcb_initial_fpucw;
caddr_t pcb_onfault; /* copyin/out fault recovery */
@ -78,6 +80,7 @@ struct pcb {
struct user_segment_descriptor pcb_gs32sd;
/* local tss, with i/o bitmap; NULL for common */
struct amd64tss *pcb_tssp;
struct savefpu *pcb_save;
char pcb_full_iret;
};

View File

@ -3185,12 +3185,12 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs)
{
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm,
(struct save87 *)fpregs);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, sizeof *fpregs);
return (0);
}
@ -3200,11 +3200,11 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
set_fpregs_xmm((struct save87 *)fpregs,
&td->td_pcb->pcb_save.sv_xmm);
&td->td_pcb->pcb_user_save.sv_xmm);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, sizeof *fpregs);
return (0);
}
@ -3331,7 +3331,7 @@ get_fpcontext(struct thread *td, mcontext_t *mcp)
addr = (void *)((char *)addr + 4);
while ((uintptr_t)(void *)addr & 0xF);
}
mcp->mc_ownedfp = npxgetregs(td, addr);
mcp->mc_ownedfp = npxgetuserregs(td, addr);
if (addr != (union savefpu *)&mcp->mc_fpstate) {
bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
@ -3376,7 +3376,7 @@ set_fpcontext(struct thread *td, const mcontext_t *mcp)
* XXX we violate the dubious requirement that npxsetregs()
* be called with interrupts disabled.
*/
npxsetregs(td, addr);
npxsetuserregs(td, addr);
#endif
/*
* Don't bother putting things back where they were in the
@ -3393,6 +3393,7 @@ fpstate_drop(struct thread *td)
{
register_t s;
KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
s = intr_disable();
#ifdef DEV_NPX
if (PCPU_GET(fpcurthread) == td)
@ -3408,7 +3409,8 @@ fpstate_drop(struct thread *td)
* sendsig() is the only caller of npxgetregs()... perhaps we just
* have too many layers.
*/
curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
PCB_NPXUSERINITDONE);
intr_restore(s);
}

View File

@ -51,7 +51,7 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data)
if (!cpu_fxsr)
return (EINVAL);
fpstate = &td->td_pcb->pcb_save.sv_xmm;
fpstate = &td->td_pcb->pcb_user_save.sv_xmm;
switch (req) {
case PT_GETXMMREGS:
error = copyout(fpstate, addr, sizeof(*fpstate));

View File

@ -156,8 +156,7 @@ ENTRY(cpu_switch)
/* have we used fp, and need a save? */
cmpl %ecx,PCPU(FPCURTHREAD)
jne 1f
addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */
pushl %edx
pushl PCB_SAVEFPU(%edx) /* h/w bugs make saving complicated */
call npxsave /* do it in a big C function */
popl %eax
1:
@ -408,7 +407,7 @@ ENTRY(savectx)
pushl %ecx
movl TD_PCB(%eax),%eax
leal PCB_SAVEFPU(%eax),%eax
movl PCB_SAVEFPU(%eax),%eax
pushl %eax
pushl %eax
call npxsave
@ -417,7 +416,7 @@ ENTRY(savectx)
popl %ecx
pushl $PCB_SAVEFPU_SIZE
leal PCB_SAVEFPU(%ecx),%ecx
movl PCB_SAVEFPU(%ecx),%ecx
pushl %ecx
pushl %eax
call bcopy

View File

@ -501,6 +501,8 @@ trap(struct trapframe *frame)
case T_DNA:
#ifdef DEV_NPX
KASSERT(PCB_USER_FPU(td->td_pcb),
("kernel FPU ctx has leaked"));
/* transparent fault (due to context switch "late") */
if (npxdna())
goto userout;
@ -533,20 +535,23 @@ trap(struct trapframe *frame)
case T_DNA:
#ifdef DEV_NPX
/*
* The kernel is apparently using npx for copying.
* XXX this should be fatal unless the kernel has
* registered such use.
*/
printf("npxdna in kernel mode!\n");
#ifdef KDB
kdb_backtrace();
#endif
KASSERT(!PCB_USER_FPU(td->td_pcb),
("Unregistered use of FPU in kernel"));
if (npxdna())
goto out;
#endif
break;
case T_ARITHTRAP: /* arithmetic trap */
case T_XMMFLT: /* SIMD floating-point exception */
case T_FPOPFLT: /* FPU operand fetch fault */
/*
* XXXKIB for now disable any FPU traps in kernel
* handler registration seems to be overkill
*/
trap_fatal(frame, 0);
goto out;
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@ -752,6 +757,8 @@ trap(struct trapframe *frame)
user:
userret(td, frame);
mtx_assert(&Giant, MA_NOTOWNED);
KASSERT(PCB_USER_FPU(td->td_pcb),
("Return from trap with kernel FPU ctx leaked"));
userout:
out:
return;
@ -1064,5 +1071,12 @@ syscall(struct trapframe *frame)
trapsignal(td, &ksi);
}
KASSERT(PCB_USER_FPU(td->td_pcb),
("System call %s returning with kernel FPU ctx leaked",
syscallname(td->td_proc, sa.code)));
KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
("System call %s returning with mangled pcb_save",
syscallname(td->td_proc, sa.code)));
syscallret(td, error, &sa);
}

View File

@ -176,13 +176,13 @@ cpu_fork(td1, p2, td2, flags)
return;
}
/* Ensure that p1's pcb is up to date. */
/* Ensure that td1's pcb is up to date. */
if (td1 == curthread)
td1->td_pcb->pcb_gs = rgs();
#ifdef DEV_NPX
savecrit = intr_disable();
if (PCPU_GET(fpcurthread) == td1)
npxsave(&td1->td_pcb->pcb_save);
npxsave(td1->td_pcb->pcb_save);
intr_restore(savecrit);
#endif
@ -191,9 +191,12 @@ cpu_fork(td1, p2, td2, flags)
td2->td_kstack_pages * PAGE_SIZE) - 1;
td2->td_pcb = pcb2;
/* Copy p1's pcb */
/* Copy td1's pcb */
bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
/* Properly initialize pcb_save */
pcb2->pcb_save = &pcb2->pcb_user_save;
/* Point mdproc and then copy over td1's contents */
mdp2 = &p2->p_md;
bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
@ -372,6 +375,7 @@ cpu_thread_alloc(struct thread *td)
td->td_kstack_pages * PAGE_SIZE) - 1;
td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
td->td_pcb->pcb_ext = NULL;
td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save;
}
void
@ -437,7 +441,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
* values here.
*/
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE);
pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE|PCB_NPXUSERINITDONE);
pcb2->pcb_save = &pcb2->pcb_user_save;
/*
* Create a new fresh stack for the new thread.

View File

@ -143,6 +143,15 @@ union savefpu {
#define IRQ_NPX 13
struct fpu_kern_ctx {
union savefpu hwstate;
union savefpu *prev;
uint32_t flags;
};
#define FPU_KERN_CTX_NPXINITDONE 0x01
#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0)
/* full reset on some systems, NOP on others */
#define npx_full_reset() outb(IO_NPX + 1, 0)
@ -151,10 +160,22 @@ void npxdrop(void);
void npxexit(struct thread *td);
int npxformat(void);
int npxgetregs(struct thread *td, union savefpu *addr);
int npxgetuserregs(struct thread *td, union savefpu *addr);
void npxinit(void);
void npxsave(union savefpu *addr);
void npxsetregs(struct thread *td, union savefpu *addr);
void npxsetuserregs(struct thread *td, union savefpu *addr);
int npxtrap(void);
int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx,
u_int flags);
int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx);
int fpu_kern_thread(u_int flags);
int is_fpu_kern_thread(u_int flags);
/*
* Flags for fpu_kern_enter() and fpu_kern_thread().
*/
#define FPU_KERN_NORMAL 0x0000
#endif

View File

@ -60,7 +60,7 @@ struct pcb {
int pcb_dr6;
int pcb_dr7;
union savefpu pcb_save;
union savefpu pcb_user_save;
uint16_t pcb_initial_npxcw;
u_int pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
@ -68,6 +68,8 @@ struct pcb {
#define PCB_NPXTRAP 0x04 /* npx trap pending */
#define PCB_NPXINITDONE 0x08 /* fpu state is initialized */
#define PCB_VM86CALL 0x10 /* in vm86 call */
#define PCB_NPXUSERINITDONE 0x20 /* user fpu state is initialized */
#define PCB_KERNNPX 0x40 /* kernel uses npx */
caddr_t pcb_onfault; /* copyin/out fault recovery */
int pcb_gs;
@ -76,6 +78,7 @@ struct pcb {
struct pcb_ext *pcb_ext; /* optional pcb extension */
int pcb_psl; /* process status long */
u_long pcb_vm86[2]; /* vm86bios scratch space */
union savefpu *pcb_save;
};
#ifdef _KERNEL

View File

@ -135,12 +135,12 @@ void stop_emulating(void);
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(thread) \
(cpu_fxsr ? \
(thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \
(thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw)
(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
(thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
#define GET_FPU_SW(thread) \
(cpu_fxsr ? \
(thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \
(thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw)
(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
(thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
#define SET_FPU_CW(savefpu, value) do { \
if (cpu_fxsr) \
(savefpu)->sv_xmm.sv_env.en_cw = (value); \
@ -149,9 +149,9 @@ void stop_emulating(void);
} while (0)
#else /* CPU_ENABLE_SSE */
#define GET_FPU_CW(thread) \
(thread->td_pcb->pcb_save.sv_87.sv_env.en_cw)
(thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
#define GET_FPU_SW(thread) \
(thread->td_pcb->pcb_save.sv_87.sv_env.en_sw)
(thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
#define SET_FPU_CW(savefpu, value) \
(savefpu)->sv_87.sv_env.en_cw = (value)
#endif /* CPU_ENABLE_SSE */
@ -502,7 +502,7 @@ npxexit(td)
savecrit = intr_disable();
if (curthread == PCPU_GET(fpcurthread))
npxsave(&PCPU_GET(curpcb)->pcb_save);
npxsave(PCPU_GET(curpcb)->pcb_save);
intr_restore(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@ -809,6 +809,8 @@ npxdna(void)
if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
fldcw(&pcb->pcb_initial_npxcw);
pcb->pcb_flags |= PCB_NPXINITDONE;
if (PCB_USER_FPU(pcb))
pcb->pcb_flags |= PCB_NPXUSERINITDONE;
} else {
/*
* The following fpurstor() may cause an IRQ13 when the
@ -824,7 +826,7 @@ npxdna(void)
* fnclex if it is the first FPU instruction after a context
* switch.
*/
fpurstor(&pcb->pcb_save);
fpurstor(pcb->pcb_save);
}
intr_restore(s);
@ -895,18 +897,18 @@ npxdrop()
* It returns the FPU ownership status.
*/
int
npxgetregs(td, addr)
struct thread *td;
union savefpu *addr;
npxgetregs(struct thread *td, union savefpu *addr)
{
struct pcb *pcb;
register_t s;
if (!npx_exists)
return (_MC_FPOWNED_NONE);
if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
bcopy(&npx_initialstate, addr, sizeof(npx_initialstate));
SET_FPU_CW(addr, td->td_pcb->pcb_initial_npxcw);
SET_FPU_CW(addr, pcb->pcb_initial_npxcw);
return (_MC_FPOWNED_NONE);
}
s = intr_disable();
@ -925,7 +927,43 @@ npxgetregs(td, addr)
return (_MC_FPOWNED_FPU);
} else {
intr_restore(s);
bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr));
bcopy(pcb->pcb_save, addr, sizeof(*addr));
return (_MC_FPOWNED_PCB);
}
}
int
npxgetuserregs(struct thread *td, union savefpu *addr)
{
struct pcb *pcb;
register_t s;
if (!npx_exists)
return (_MC_FPOWNED_NONE);
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) {
bcopy(&npx_initialstate, addr, sizeof(npx_initialstate));
SET_FPU_CW(addr, pcb->pcb_initial_npxcw);
return (_MC_FPOWNED_NONE);
}
s = intr_disable();
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
fpusave(addr);
#ifdef CPU_ENABLE_SSE
if (!cpu_fxsr)
#endif
/*
* fnsave initializes the FPU and destroys whatever
* context it contains. Make sure the FPU owner
* starts with a clean state next time.
*/
npxdrop();
intr_restore(s);
return (_MC_FPOWNED_FPU);
} else {
intr_restore(s);
bcopy(&pcb->pcb_user_save, addr, sizeof(*addr));
return (_MC_FPOWNED_PCB);
}
}
@ -934,15 +972,15 @@ npxgetregs(td, addr)
* Set the state of the FPU.
*/
void
npxsetregs(td, addr)
struct thread *td;
union savefpu *addr;
npxsetregs(struct thread *td, union savefpu *addr)
{
struct pcb *pcb;
register_t s;
if (!npx_exists)
return;
pcb = td->td_pcb;
s = intr_disable();
if (td == PCPU_GET(fpcurthread)) {
#ifdef CPU_ENABLE_SSE
@ -953,9 +991,39 @@ npxsetregs(td, addr)
intr_restore(s);
} else {
intr_restore(s);
bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr));
bcopy(addr, pcb->pcb_save, sizeof(*addr));
}
if (PCB_USER_FPU(pcb))
pcb->pcb_flags |= PCB_NPXUSERINITDONE;
pcb->pcb_flags |= PCB_NPXINITDONE;
}
void
npxsetuserregs(struct thread *td, union savefpu *addr)
{
struct pcb *pcb;
register_t s;
if (!npx_exists)
return;
pcb = td->td_pcb;
s = intr_disable();
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
#ifdef CPU_ENABLE_SSE
if (!cpu_fxsr)
#endif
fnclex(); /* As in npxdrop(). */
fpurstor(addr);
intr_restore(s);
pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
} else {
intr_restore(s);
bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
if (PCB_USER_FPU(pcb))
pcb->pcb_flags |= PCB_NPXINITDONE;
pcb->pcb_flags |= PCB_NPXUSERINITDONE;
}
curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE;
}
static void
@ -1124,3 +1192,74 @@ DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
#endif
#endif /* DEV_ISA */
int
fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
{
struct pcb *pcb;
pcb = td->td_pcb;
KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
("mangled pcb_save"));
ctx->flags = 0;
if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
npxexit(td);
ctx->prev = pcb->pcb_save;
pcb->pcb_save = &ctx->hwstate;
pcb->pcb_flags |= PCB_KERNNPX;
pcb->pcb_flags &= ~PCB_NPXINITDONE;
return (0);
}
int
fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
{
struct pcb *pcb;
register_t savecrit;
pcb = td->td_pcb;
savecrit = intr_disable();
if (curthread == PCPU_GET(fpcurthread))
npxdrop();
intr_restore(savecrit);
pcb->pcb_save = ctx->prev;
if (pcb->pcb_save == &pcb->pcb_user_save) {
if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
pcb->pcb_flags |= PCB_NPXINITDONE;
else
pcb->pcb_flags &= ~PCB_NPXINITDONE;
pcb->pcb_flags &= ~PCB_KERNNPX;
} else {
if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
pcb->pcb_flags |= PCB_NPXINITDONE;
else
pcb->pcb_flags &= ~PCB_NPXINITDONE;
KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
}
return (0);
}
int
fpu_kern_thread(u_int flags)
{
struct pcb *pcb;
pcb = PCPU_GET(curpcb);
KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
("Only kthread may use fpu_kern_thread"));
KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save"));
KASSERT(PCB_USER_FPU(pcb), ("recursive call"));
pcb->pcb_flags |= PCB_KERNNPX;
return (0);
}
int
is_fpu_kern_thread(u_int flags)
{
if ((curthread->td_pflags & TDP_KTHREAD) == 0)
return (0);
return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNNPX) != 0);
}

View File

@ -224,7 +224,7 @@ linux_proc_read_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs)
PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0)
return (EIO);
bcopy(&td->td_pcb->pcb_save.sv_xmm, fpxregs, sizeof(*fpxregs));
bcopy(&td->td_pcb->pcb_user_save.sv_xmm, fpxregs, sizeof(*fpxregs));
return (0);
}
@ -235,7 +235,7 @@ linux_proc_write_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs)
PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
if (cpu_fxsr == 0 || (td->td_proc->p_flag & P_INMEM) == 0)
return (EIO);
bcopy(fpxregs, &td->td_pcb->pcb_save.sv_xmm, sizeof(*fpxregs));
bcopy(fpxregs, &td->td_pcb->pcb_user_save.sv_xmm, sizeof(*fpxregs));
return (0);
}
#endif

View File

@ -2513,12 +2513,12 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs)
{
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
fill_fpregs_xmm(&td->td_pcb->pcb_save->sv_xmm,
(struct save87 *)fpregs);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
bcopy(&td->td_pcb->pcb_save->sv_87, fpregs, sizeof *fpregs);
return (0);
}
@ -2528,11 +2528,11 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
set_fpregs_xmm((struct save87 *)fpregs,
&td->td_pcb->pcb_save.sv_xmm);
&td->td_pcb->pcb_save->sv_xmm);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
bcopy(fpregs, &td->td_pcb->pcb_save->sv_87, sizeof *fpregs);
return (0);
}