Activate SSE/SIMD. This is the extra context switching support that
we are required to do if we let user processes use the extra 128 bit registers etc. This is the base part of the diff I got from: http://www.issei.org/issei/FreeBSD/sse.html I believe this is by: Mr. SUZUKI Issei <issei@issei.org> SMP support apparently by: Takekazu KATO <kato@chino.it.okayama-u.ac.jp> Test code by: NAKAMURA Kazushi <kaz@kobe1995.net>, see http://kobe1995.net/~kaz/FreeBSD/SSE.en.html I have fixed a couple of style(9) deviations. I have some followup commits to fix a couple of non-style things.
This commit is contained in:
parent
b5164c6585
commit
e00129231d
@ -153,6 +153,9 @@ IDTVEC(fpu)
|
||||
IDTVEC(align)
|
||||
TRAP(T_ALIGNFLT)
|
||||
|
||||
IDTVEC(xmm)
|
||||
pushl $0; TRAP(T_XMMFLT)
|
||||
|
||||
/*
|
||||
* alltraps entry point. Interrupts are enabled if this was a trap
|
||||
* gate (TGT), else disabled if this was an interrupt gate (IGT).
|
||||
|
@ -153,6 +153,9 @@ IDTVEC(fpu)
|
||||
IDTVEC(align)
|
||||
TRAP(T_ALIGNFLT)
|
||||
|
||||
IDTVEC(xmm)
|
||||
pushl $0; TRAP(T_XMMFLT)
|
||||
|
||||
/*
|
||||
* alltraps entry point. Interrupts are enabled if this was a trap
|
||||
* gate (TGT), else disabled if this was an interrupt gate (IGT).
|
||||
|
@ -35,6 +35,7 @@
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "opt_cpu.h"
|
||||
#include "opt_debug_npx.h"
|
||||
#include "opt_math_emulate.h"
|
||||
|
||||
@ -99,6 +100,8 @@
|
||||
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
|
||||
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
|
||||
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
|
||||
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||||
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
|
||||
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
|
||||
: : "n" (CR0_TS) : "ax")
|
||||
#define stop_emulating() __asm("clts")
|
||||
@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
|
||||
void fnstsw __P((caddr_t addr));
|
||||
void fp_divide_by_0 __P((void));
|
||||
void frstor __P((caddr_t addr));
|
||||
void fxsave __P((caddr_t addr));
|
||||
void fxrstor __P((caddr_t addr));
|
||||
void start_emulating __P((void));
|
||||
void stop_emulating __P((void));
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
#define GET_FPU_CW(proc) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
|
||||
#define GET_FPU_SW(proc) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
|
||||
#define MASK_FPU_SW(proc, mask) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
|
||||
#define GET_FPU_EXSW_PTR(pcb) \
|
||||
(cpu_fxsr ? \
|
||||
&(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
|
||||
&(pcb)->pcb_save.sv_87.sv_ex_sw)
|
||||
#else /* CPU_ENABLE_SSE */
|
||||
#define GET_FPU_CW(proc) \
|
||||
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
|
||||
#define GET_FPU_SW(proc) \
|
||||
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
|
||||
#define MASK_FPU_SW(proc, mask) \
|
||||
((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
|
||||
#define GET_FPU_EXSW_PTR(pcb) \
|
||||
(&(pcb)->pcb_save.sv_87.sv_ex_sw)
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
|
||||
typedef u_char bool_t;
|
||||
|
||||
static int npx_attach __P((device_t dev));
|
||||
@ -127,6 +160,8 @@ static void npx_intr __P((void *));
|
||||
#endif
|
||||
static int npx_probe __P((device_t dev));
|
||||
static int npx_probe1 __P((device_t dev));
|
||||
static void fpusave __P((union savefpu *, u_char));
|
||||
static void fpurstor __P((union savefpu *, u_char));
|
||||
#ifdef I586_CPU_XXX
|
||||
static long timezero __P((const char *funcname,
|
||||
void (*func)(void *buf, size_t len)));
|
||||
@ -529,7 +564,7 @@ void
|
||||
npxinit(control)
|
||||
u_short control;
|
||||
{
|
||||
struct save87 dummy;
|
||||
union savefpu dummy;
|
||||
critical_t savecrit;
|
||||
|
||||
if (!npx_exists)
|
||||
@ -544,7 +579,7 @@ npxinit(control)
|
||||
stop_emulating();
|
||||
fldcw(&control);
|
||||
if (PCPU_GET(curpcb) != NULL)
|
||||
fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
|
||||
start_emulating();
|
||||
critical_exit(savecrit);
|
||||
}
|
||||
@ -560,7 +595,7 @@ npxexit(p)
|
||||
|
||||
savecrit = critical_enter();
|
||||
if (p == PCPU_GET(npxproc))
|
||||
npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
npxsave(&PCPU_GET(curpcb)->pcb_save);
|
||||
critical_exit(savecrit);
|
||||
#ifdef NPX_DEBUG
|
||||
if (npx_exists) {
|
||||
@ -773,6 +808,7 @@ npxtrap()
|
||||
{
|
||||
critical_t savecrit;
|
||||
u_short control, status;
|
||||
u_long *exstat;
|
||||
|
||||
if (!npx_exists) {
|
||||
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
|
||||
@ -787,16 +823,17 @@ npxtrap()
|
||||
* wherever they are.
|
||||
*/
|
||||
if (PCPU_GET(npxproc) != curproc) {
|
||||
control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
|
||||
status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
|
||||
control = GET_FPU_CW(curproc);
|
||||
status = GET_FPU_SW(curproc);
|
||||
} else {
|
||||
fnstcw(&control);
|
||||
fnstsw(&status);
|
||||
}
|
||||
|
||||
curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
|
||||
exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
|
||||
*exstat = status;
|
||||
if (PCPU_GET(npxproc) != curproc)
|
||||
curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
|
||||
MASK_FPU_SW(curproc, ~0x80bf);
|
||||
else
|
||||
fnclex();
|
||||
critical_exit(savecrit);
|
||||
@ -813,6 +850,7 @@ npxtrap()
|
||||
int
|
||||
npxdna()
|
||||
{
|
||||
u_long *exstat;
|
||||
critical_t s;
|
||||
|
||||
if (!npx_exists)
|
||||
@ -828,7 +866,9 @@ npxdna()
|
||||
* Record new context early in case frstor causes an IRQ13.
|
||||
*/
|
||||
PCPU_SET(npxproc, CURPROC);
|
||||
PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
|
||||
|
||||
exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
|
||||
*exstat = 0;
|
||||
/*
|
||||
* The following frstor may cause an IRQ13 when the state being
|
||||
* restored has a pending error. The error will appear to have been
|
||||
@ -841,7 +881,7 @@ npxdna()
|
||||
* fnsave are broken, so our treatment breaks fnclex if it is the
|
||||
* first FPU instruction after a context switch.
|
||||
*/
|
||||
frstor(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
|
||||
critical_exit(s);
|
||||
|
||||
return (1);
|
||||
@ -872,15 +912,46 @@ npxdna()
|
||||
*/
|
||||
void
|
||||
npxsave(addr)
|
||||
struct save87 *addr;
|
||||
union savefpu *addr;
|
||||
{
|
||||
|
||||
stop_emulating();
|
||||
fnsave(addr);
|
||||
fpusave(addr, curproc->p_oncpu);
|
||||
|
||||
start_emulating();
|
||||
PCPU_SET(npxproc, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
fpusave(addr, oncpu)
|
||||
union savefpu *addr;
|
||||
u_char oncpu;
|
||||
{
|
||||
static struct savexmm svxmm[MAXCPU];
|
||||
|
||||
if (!cpu_fxsr)
|
||||
fnsave(addr);
|
||||
else {
|
||||
fxsave(&svxmm[oncpu]);
|
||||
bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fpurstor(addr, oncpu)
|
||||
union savefpu *addr;
|
||||
u_char oncpu;
|
||||
{
|
||||
static struct savexmm svxmm[MAXCPU];
|
||||
|
||||
if (!cpu_fxsr)
|
||||
frstor(addr);
|
||||
else {
|
||||
bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
|
||||
fxrstor(&svxmm[oncpu]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef I586_CPU_XXX
|
||||
static long
|
||||
timezero(funcname, func)
|
||||
|
@ -126,8 +126,9 @@ ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
|
||||
|
||||
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
|
||||
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
|
||||
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
|
||||
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
|
||||
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
|
||||
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
|
||||
ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
|
||||
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
|
||||
|
||||
#ifdef SMP
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <machine/cputypes.h>
|
||||
#include <machine/md_var.h>
|
||||
@ -61,8 +62,14 @@ static void init_6x86(void);
|
||||
static void init_6x86MX(void);
|
||||
static void init_ppro(void);
|
||||
static void init_mendocino(void);
|
||||
void enable_sse();
|
||||
#endif
|
||||
|
||||
int hw_instruction_sse = 0;
|
||||
SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
|
||||
&hw_instruction_sse, 0,
|
||||
"SIMD/MMX2 instructions available in CPU");
|
||||
|
||||
#ifdef I486_CPU
|
||||
/*
|
||||
* IBM Blue Lightning
|
||||
@ -501,6 +508,20 @@ init_mendocino(void)
|
||||
#endif /* CPU_PPRO2CELERON */
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize CR4 (Control register 4) to enable SSE instructions.
|
||||
*/
|
||||
void
|
||||
enable_sse(void)
|
||||
{
|
||||
#if defined(CPU_ENABLE_SSE)
|
||||
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
|
||||
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
|
||||
cpu_fxsr = hw_instruction_sse = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* I686_CPU */
|
||||
|
||||
void
|
||||
@ -544,6 +565,7 @@ initializecpu(void)
|
||||
init_mendocino();
|
||||
break;
|
||||
}
|
||||
enable_sse();
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
@ -113,12 +113,13 @@ HIDENAME(tmpstk):
|
||||
.globl boothowto,bootdev
|
||||
|
||||
.globl cpu,cpu_vendor,cpu_id,bootinfo
|
||||
.globl cpu_high, cpu_feature
|
||||
.globl cpu_high, cpu_feature, cpu_fxsr
|
||||
|
||||
cpu: .long 0 /* are we 386, 386sx, or 486 */
|
||||
cpu_id: .long 0 /* stepping ID */
|
||||
cpu_high: .long 0 /* highest arg to CPUID */
|
||||
cpu_feature: .long 0 /* features */
|
||||
cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
|
||||
cpu_vendor: .space 20 /* CPU origin code */
|
||||
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
|
||||
|
||||
|
@ -113,12 +113,13 @@ HIDENAME(tmpstk):
|
||||
.globl boothowto,bootdev
|
||||
|
||||
.globl cpu,cpu_vendor,cpu_id,bootinfo
|
||||
.globl cpu_high, cpu_feature
|
||||
.globl cpu_high, cpu_feature, cpu_fxsr
|
||||
|
||||
cpu: .long 0 /* are we 386, 386sx, or 486 */
|
||||
cpu_id: .long 0 /* stepping ID */
|
||||
cpu_high: .long 0 /* highest arg to CPUID */
|
||||
cpu_feature: .long 0 /* features */
|
||||
cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
|
||||
cpu_vendor: .space 20 /* CPU origin code */
|
||||
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
|
||||
|
||||
|
@ -127,6 +127,10 @@ extern void initializecpu(void);
|
||||
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
|
||||
|
||||
static void cpu_startup __P((void *));
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
|
||||
static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
|
||||
|
||||
int _udatasel, _ucodesel;
|
||||
@ -1361,7 +1365,7 @@ extern inthand_t
|
||||
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
|
||||
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
|
||||
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
|
||||
IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
|
||||
IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
|
||||
|
||||
void
|
||||
sdtossd(sd, ssd)
|
||||
@ -1900,6 +1904,7 @@ init386(first)
|
||||
setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(0x80, &IDTVEC(int0x80_syscall),
|
||||
SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
|
||||
@ -2092,8 +2097,8 @@ int ptrace_write_u(p, off, data)
|
||||
*(int*)((char *)p->p_addr + off) = data;
|
||||
return (0);
|
||||
}
|
||||
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
|
||||
if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
|
||||
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
|
||||
if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
|
||||
*(int*)((char *)p->p_addr + off) = data;
|
||||
return (0);
|
||||
}
|
||||
@ -2161,12 +2166,73 @@ set_regs(p, regs)
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
static void
|
||||
fill_fpregs_xmm(sv_xmm, sv_87)
|
||||
struct savexmm *sv_xmm;
|
||||
struct save87 *sv_87;
|
||||
{
|
||||
register struct env87 *penv_87 = &sv_87->sv_env;
|
||||
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
|
||||
int i;
|
||||
|
||||
/* FPU control/status */
|
||||
penv_87->en_cw = penv_xmm->en_cw;
|
||||
penv_87->en_sw = penv_xmm->en_sw;
|
||||
penv_87->en_tw = penv_xmm->en_tw;
|
||||
penv_87->en_fip = penv_xmm->en_fip;
|
||||
penv_87->en_fcs = penv_xmm->en_fcs;
|
||||
penv_87->en_opcode = penv_xmm->en_opcode;
|
||||
penv_87->en_foo = penv_xmm->en_foo;
|
||||
penv_87->en_fos = penv_xmm->en_fos;
|
||||
|
||||
/* FPU registers */
|
||||
for (i = 0; i < 8; ++i)
|
||||
sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
|
||||
|
||||
sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
|
||||
}
|
||||
|
||||
static void
|
||||
set_fpregs_xmm(sv_87, sv_xmm)
|
||||
struct save87 *sv_87;
|
||||
struct savexmm *sv_xmm;
|
||||
{
|
||||
register struct env87 *penv_87 = &sv_87->sv_env;
|
||||
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
|
||||
int i;
|
||||
|
||||
/* FPU control/status */
|
||||
penv_xmm->en_cw = penv_87->en_cw;
|
||||
penv_xmm->en_sw = penv_87->en_sw;
|
||||
penv_xmm->en_tw = penv_87->en_tw;
|
||||
penv_xmm->en_fip = penv_87->en_fip;
|
||||
penv_xmm->en_fcs = penv_87->en_fcs;
|
||||
penv_xmm->en_opcode = penv_87->en_opcode;
|
||||
penv_xmm->en_foo = penv_87->en_foo;
|
||||
penv_xmm->en_fos = penv_87->en_fos;
|
||||
|
||||
/* FPU registers */
|
||||
for (i = 0; i < 8; ++i)
|
||||
sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
|
||||
|
||||
sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
|
||||
}
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
|
||||
int
|
||||
fill_fpregs(p, fpregs)
|
||||
struct proc *p;
|
||||
struct fpreg *fpregs;
|
||||
{
|
||||
bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
if (cpu_fxsr) {
|
||||
fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
|
||||
(struct save87 *)fpregs);
|
||||
return (0);
|
||||
}
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -2175,7 +2241,14 @@ set_fpregs(p, fpregs)
|
||||
struct proc *p;
|
||||
struct fpreg *fpregs;
|
||||
{
|
||||
bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
if (cpu_fxsr) {
|
||||
set_fpregs_xmm((struct save87 *)fpregs,
|
||||
&p->p_addr->u_pcb.pcb_save.sv_xmm);
|
||||
return (0);
|
||||
}
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -2221,6 +2221,8 @@ invltlb(void)
|
||||
* This is called once the rest of the system is up and running and we're
|
||||
* ready to let the AP's out of the pen.
|
||||
*/
|
||||
extern void enable_sse(void);
|
||||
|
||||
void
|
||||
ap_init(void)
|
||||
{
|
||||
@ -2260,6 +2262,9 @@ ap_init(void)
|
||||
/* set up FPU state on the AP */
|
||||
npxinit(__INITIAL_NPXCW__);
|
||||
|
||||
/* set up SSE registers */
|
||||
enable_sse();
|
||||
|
||||
/* A quick check from sanity claus */
|
||||
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
|
||||
if (PCPU_GET(cpuid) != apic_id) {
|
||||
|
@ -2221,6 +2221,8 @@ invltlb(void)
|
||||
* This is called once the rest of the system is up and running and we're
|
||||
* ready to let the AP's out of the pen.
|
||||
*/
|
||||
extern void enable_sse(void);
|
||||
|
||||
void
|
||||
ap_init(void)
|
||||
{
|
||||
@ -2260,6 +2262,9 @@ ap_init(void)
|
||||
/* set up FPU state on the AP */
|
||||
npxinit(__INITIAL_NPXCW__);
|
||||
|
||||
/* set up SSE registers */
|
||||
enable_sse();
|
||||
|
||||
/* A quick check from sanity claus */
|
||||
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
|
||||
if (PCPU_GET(cpuid) != apic_id) {
|
||||
|
@ -976,7 +976,7 @@ ENTRY(i586_copyin)
|
||||
ENTRY(fastmove)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $PCB_SAVEFPU_SIZE+3*4,%esp
|
||||
subl $PCB_SAVE87_SIZE+3*4,%esp
|
||||
|
||||
movl 8(%ebp),%ecx
|
||||
cmpl $63,%ecx
|
||||
@ -1018,7 +1018,7 @@ ENTRY(fastmove)
|
||||
movl PCPU(CURPCB),%esi
|
||||
addl $PCB_SAVEFPU,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
movl -12(%ebp),%ecx
|
||||
@ -1102,7 +1102,7 @@ fastmove_loop:
|
||||
addl $PCB_SAVEFPU,%edi
|
||||
movl %esp,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
movl -12(%ebp),%ecx
|
||||
@ -1147,7 +1147,7 @@ fastmove_fault:
|
||||
addl $PCB_SAVEFPU,%edi
|
||||
movl %esp,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
|
||||
|
@ -976,7 +976,7 @@ ENTRY(i586_copyin)
|
||||
ENTRY(fastmove)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $PCB_SAVEFPU_SIZE+3*4,%esp
|
||||
subl $PCB_SAVE87_SIZE+3*4,%esp
|
||||
|
||||
movl 8(%ebp),%ecx
|
||||
cmpl $63,%ecx
|
||||
@ -1018,7 +1018,7 @@ ENTRY(fastmove)
|
||||
movl PCPU(CURPCB),%esi
|
||||
addl $PCB_SAVEFPU,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
movl -12(%ebp),%ecx
|
||||
@ -1102,7 +1102,7 @@ fastmove_loop:
|
||||
addl $PCB_SAVEFPU,%edi
|
||||
movl %esp,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
movl -12(%ebp),%ecx
|
||||
@ -1147,7 +1147,7 @@ fastmove_fault:
|
||||
addl $PCB_SAVEFPU,%edi
|
||||
movl %esp,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
|
||||
|
@ -386,6 +386,11 @@ restart:
|
||||
ucode = T_FPOPFLT;
|
||||
i = SIGILL;
|
||||
break;
|
||||
|
||||
case T_XMMFLT: /* SIMD floating-point exception */
|
||||
ucode = 0; /* XXX */
|
||||
i = SIGFPE;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* kernel trap */
|
||||
|
@ -148,7 +148,7 @@ cpu_fork(p1, p2, flags)
|
||||
p1->p_addr->u_pcb.pcb_gs = rgs();
|
||||
savecrit = critical_enter();
|
||||
if (PCPU_GET(npxproc) == p1)
|
||||
npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
|
||||
npxsave(&p1->p_addr->u_pcb.pcb_save);
|
||||
critical_exit(savecrit);
|
||||
#endif
|
||||
|
||||
|
@ -85,6 +85,42 @@ struct save87 {
|
||||
u_char sv_pad[64]; /* padding; used by emulators */
|
||||
};
|
||||
|
||||
struct envxmm {
|
||||
u_int16_t en_cw; /* control word (16bits) */
|
||||
u_int16_t en_sw; /* status word (16bits) */
|
||||
u_int16_t en_tw; /* tag word (16bits) */
|
||||
u_int16_t en_opcode; /* opcode last executed (11 bits ) */
|
||||
u_int32_t en_fip; /* floating point instruction pointer */
|
||||
u_int16_t en_fcs; /* floating code segment selector */
|
||||
u_int16_t en_pad0; /* padding */
|
||||
u_int32_t en_foo; /* floating operand offset */
|
||||
u_int16_t en_fos; /* floating operand segment selector */
|
||||
u_int16_t en_pad1; /* padding */
|
||||
u_int32_t en_mxcsr; /* SSE sontorol/status register */
|
||||
u_int32_t en_pad2; /* padding */
|
||||
};
|
||||
|
||||
/* Contents of each SSE extended accumulator */
|
||||
struct xmmacc {
|
||||
u_char xmm_bytes[16];
|
||||
};
|
||||
|
||||
struct savexmm {
|
||||
struct envxmm sv_env;
|
||||
struct {
|
||||
struct fpacc87 fp_acc;
|
||||
u_char fp_pad[6]; /* padding */
|
||||
} sv_fp[8];
|
||||
struct xmmacc sv_xmm[8];
|
||||
u_long sv_ex_sw; /* status word for last exception */
|
||||
u_char sv_pad[220];
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
union savefpu {
|
||||
struct save87 sv_87;
|
||||
struct savexmm sv_xmm;
|
||||
};
|
||||
|
||||
/*
|
||||
* The hardware default control word for i387's and later coprocessors is
|
||||
* 0x37F, giving:
|
||||
@ -108,7 +144,7 @@ struct save87 {
|
||||
int npxdna __P((void));
|
||||
void npxexit __P((struct proc *p));
|
||||
void npxinit __P((int control));
|
||||
void npxsave __P((struct save87 *addr));
|
||||
void npxsave __P((union savefpu *addr));
|
||||
int npxtrap __P((void));
|
||||
#endif
|
||||
|
||||
|
@ -47,6 +47,7 @@ extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
|
||||
extern u_int cpu_feature;
|
||||
extern u_int cpu_high;
|
||||
extern u_int cpu_id;
|
||||
extern u_int cpu_fxsr;
|
||||
extern char cpu_vendor[];
|
||||
extern u_int cyrix_did;
|
||||
extern char kstack[];
|
||||
|
@ -2221,6 +2221,8 @@ invltlb(void)
|
||||
* This is called once the rest of the system is up and running and we're
|
||||
* ready to let the AP's out of the pen.
|
||||
*/
|
||||
extern void enable_sse(void);
|
||||
|
||||
void
|
||||
ap_init(void)
|
||||
{
|
||||
@ -2260,6 +2262,9 @@ ap_init(void)
|
||||
/* set up FPU state on the AP */
|
||||
npxinit(__INITIAL_NPXCW__);
|
||||
|
||||
/* set up SSE registers */
|
||||
enable_sse();
|
||||
|
||||
/* A quick check from sanity claus */
|
||||
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
|
||||
if (PCPU_GET(cpuid) != apic_id) {
|
||||
|
@ -85,6 +85,42 @@ struct save87 {
|
||||
u_char sv_pad[64]; /* padding; used by emulators */
|
||||
};
|
||||
|
||||
struct envxmm {
|
||||
u_int16_t en_cw; /* control word (16bits) */
|
||||
u_int16_t en_sw; /* status word (16bits) */
|
||||
u_int16_t en_tw; /* tag word (16bits) */
|
||||
u_int16_t en_opcode; /* opcode last executed (11 bits ) */
|
||||
u_int32_t en_fip; /* floating point instruction pointer */
|
||||
u_int16_t en_fcs; /* floating code segment selector */
|
||||
u_int16_t en_pad0; /* padding */
|
||||
u_int32_t en_foo; /* floating operand offset */
|
||||
u_int16_t en_fos; /* floating operand segment selector */
|
||||
u_int16_t en_pad1; /* padding */
|
||||
u_int32_t en_mxcsr; /* SSE sontorol/status register */
|
||||
u_int32_t en_pad2; /* padding */
|
||||
};
|
||||
|
||||
/* Contents of each SSE extended accumulator */
|
||||
struct xmmacc {
|
||||
u_char xmm_bytes[16];
|
||||
};
|
||||
|
||||
struct savexmm {
|
||||
struct envxmm sv_env;
|
||||
struct {
|
||||
struct fpacc87 fp_acc;
|
||||
u_char fp_pad[6]; /* padding */
|
||||
} sv_fp[8];
|
||||
struct xmmacc sv_xmm[8];
|
||||
u_long sv_ex_sw; /* status word for last exception */
|
||||
u_char sv_pad[220];
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
union savefpu {
|
||||
struct save87 sv_87;
|
||||
struct savexmm sv_xmm;
|
||||
};
|
||||
|
||||
/*
|
||||
* The hardware default control word for i387's and later coprocessors is
|
||||
* 0x37F, giving:
|
||||
@ -108,7 +144,7 @@ struct save87 {
|
||||
int npxdna __P((void));
|
||||
void npxexit __P((struct proc *p));
|
||||
void npxinit __P((int control));
|
||||
void npxsave __P((struct save87 *addr));
|
||||
void npxsave __P((union savefpu *addr));
|
||||
int npxtrap __P((void));
|
||||
#endif
|
||||
|
||||
|
@ -62,7 +62,8 @@ struct pcb {
|
||||
int pcb_dr7;
|
||||
|
||||
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
|
||||
struct save87 pcb_savefpu; /* floating point state for 287/387 */
|
||||
union savefpu pcb_save;
|
||||
#define pcb_savefpu pcb_save.sv_87
|
||||
u_char pcb_flags;
|
||||
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
|
||||
#define PCB_DBREGS 0x02 /* process using debug registers */
|
||||
|
@ -93,6 +93,8 @@
|
||||
#define CPUID_PGE 0x2000
|
||||
#define CPUID_MCA 0x4000
|
||||
#define CPUID_CMOV 0x8000
|
||||
#define CPUID_FXSR 0x01000000
|
||||
#define CPUID_XMM 0x02000000
|
||||
|
||||
/*
|
||||
* Model-specific registers for the i386 family
|
||||
|
@ -64,7 +64,8 @@
|
||||
#define T_SEGNPFLT 26 /* segment not present fault */
|
||||
#define T_STKFLT 27 /* stack fault */
|
||||
#define T_MCHK 28 /* machine check trap */
|
||||
#define T_RESERVED 29 /* reserved (unknown) */
|
||||
#define T_XMMFLT 29 /* SIMD floating-point exception */
|
||||
#define T_RESERVED 30 /* reserved (unknown) */
|
||||
|
||||
/* XXX most of the following codes aren't used, but could be. */
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "opt_cpu.h"
|
||||
#include "opt_debug_npx.h"
|
||||
#include "opt_math_emulate.h"
|
||||
|
||||
@ -99,6 +100,8 @@
|
||||
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
|
||||
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
|
||||
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
|
||||
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||||
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
|
||||
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
|
||||
: : "n" (CR0_TS) : "ax")
|
||||
#define stop_emulating() __asm("clts")
|
||||
@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
|
||||
void fnstsw __P((caddr_t addr));
|
||||
void fp_divide_by_0 __P((void));
|
||||
void frstor __P((caddr_t addr));
|
||||
void fxsave __P((caddr_t addr));
|
||||
void fxrstor __P((caddr_t addr));
|
||||
void start_emulating __P((void));
|
||||
void stop_emulating __P((void));
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
#define GET_FPU_CW(proc) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
|
||||
#define GET_FPU_SW(proc) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
|
||||
#define MASK_FPU_SW(proc, mask) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
|
||||
#define GET_FPU_EXSW_PTR(pcb) \
|
||||
(cpu_fxsr ? \
|
||||
&(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
|
||||
&(pcb)->pcb_save.sv_87.sv_ex_sw)
|
||||
#else /* CPU_ENABLE_SSE */
|
||||
#define GET_FPU_CW(proc) \
|
||||
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
|
||||
#define GET_FPU_SW(proc) \
|
||||
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
|
||||
#define MASK_FPU_SW(proc, mask) \
|
||||
((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
|
||||
#define GET_FPU_EXSW_PTR(pcb) \
|
||||
(&(pcb)->pcb_save.sv_87.sv_ex_sw)
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
|
||||
typedef u_char bool_t;
|
||||
|
||||
static int npx_attach __P((device_t dev));
|
||||
@ -127,6 +160,8 @@ static void npx_intr __P((void *));
|
||||
#endif
|
||||
static int npx_probe __P((device_t dev));
|
||||
static int npx_probe1 __P((device_t dev));
|
||||
static void fpusave __P((union savefpu *, u_char));
|
||||
static void fpurstor __P((union savefpu *, u_char));
|
||||
#ifdef I586_CPU_XXX
|
||||
static long timezero __P((const char *funcname,
|
||||
void (*func)(void *buf, size_t len)));
|
||||
@ -529,7 +564,7 @@ void
|
||||
npxinit(control)
|
||||
u_short control;
|
||||
{
|
||||
struct save87 dummy;
|
||||
union savefpu dummy;
|
||||
critical_t savecrit;
|
||||
|
||||
if (!npx_exists)
|
||||
@ -544,7 +579,7 @@ npxinit(control)
|
||||
stop_emulating();
|
||||
fldcw(&control);
|
||||
if (PCPU_GET(curpcb) != NULL)
|
||||
fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
|
||||
start_emulating();
|
||||
critical_exit(savecrit);
|
||||
}
|
||||
@ -560,7 +595,7 @@ npxexit(p)
|
||||
|
||||
savecrit = critical_enter();
|
||||
if (p == PCPU_GET(npxproc))
|
||||
npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
npxsave(&PCPU_GET(curpcb)->pcb_save);
|
||||
critical_exit(savecrit);
|
||||
#ifdef NPX_DEBUG
|
||||
if (npx_exists) {
|
||||
@ -773,6 +808,7 @@ npxtrap()
|
||||
{
|
||||
critical_t savecrit;
|
||||
u_short control, status;
|
||||
u_long *exstat;
|
||||
|
||||
if (!npx_exists) {
|
||||
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
|
||||
@ -787,16 +823,17 @@ npxtrap()
|
||||
* wherever they are.
|
||||
*/
|
||||
if (PCPU_GET(npxproc) != curproc) {
|
||||
control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
|
||||
status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
|
||||
control = GET_FPU_CW(curproc);
|
||||
status = GET_FPU_SW(curproc);
|
||||
} else {
|
||||
fnstcw(&control);
|
||||
fnstsw(&status);
|
||||
}
|
||||
|
||||
curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
|
||||
exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
|
||||
*exstat = status;
|
||||
if (PCPU_GET(npxproc) != curproc)
|
||||
curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
|
||||
MASK_FPU_SW(curproc, ~0x80bf);
|
||||
else
|
||||
fnclex();
|
||||
critical_exit(savecrit);
|
||||
@ -813,6 +850,7 @@ npxtrap()
|
||||
int
|
||||
npxdna()
|
||||
{
|
||||
u_long *exstat;
|
||||
critical_t s;
|
||||
|
||||
if (!npx_exists)
|
||||
@ -828,7 +866,9 @@ npxdna()
|
||||
* Record new context early in case frstor causes an IRQ13.
|
||||
*/
|
||||
PCPU_SET(npxproc, CURPROC);
|
||||
PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
|
||||
|
||||
exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
|
||||
*exstat = 0;
|
||||
/*
|
||||
* The following frstor may cause an IRQ13 when the state being
|
||||
* restored has a pending error. The error will appear to have been
|
||||
@ -841,7 +881,7 @@ npxdna()
|
||||
* fnsave are broken, so our treatment breaks fnclex if it is the
|
||||
* first FPU instruction after a context switch.
|
||||
*/
|
||||
frstor(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
|
||||
critical_exit(s);
|
||||
|
||||
return (1);
|
||||
@ -872,15 +912,46 @@ npxdna()
|
||||
*/
|
||||
void
|
||||
npxsave(addr)
|
||||
struct save87 *addr;
|
||||
union savefpu *addr;
|
||||
{
|
||||
|
||||
stop_emulating();
|
||||
fnsave(addr);
|
||||
fpusave(addr, curproc->p_oncpu);
|
||||
|
||||
start_emulating();
|
||||
PCPU_SET(npxproc, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
fpusave(addr, oncpu)
|
||||
union savefpu *addr;
|
||||
u_char oncpu;
|
||||
{
|
||||
static struct savexmm svxmm[MAXCPU];
|
||||
|
||||
if (!cpu_fxsr)
|
||||
fnsave(addr);
|
||||
else {
|
||||
fxsave(&svxmm[oncpu]);
|
||||
bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fpurstor(addr, oncpu)
|
||||
union savefpu *addr;
|
||||
u_char oncpu;
|
||||
{
|
||||
static struct savexmm svxmm[MAXCPU];
|
||||
|
||||
if (!cpu_fxsr)
|
||||
frstor(addr);
|
||||
else {
|
||||
bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
|
||||
fxrstor(&svxmm[oncpu]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef I586_CPU_XXX
|
||||
static long
|
||||
timezero(funcname, func)
|
||||
|
@ -187,6 +187,8 @@ cpu I686_CPU # aka Pentium Pro(tm)
|
||||
# reorder). This option should not be used if you use memory mapped
|
||||
# I/O device(s).
|
||||
#
|
||||
# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
|
||||
#
|
||||
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
|
||||
#
|
||||
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
|
||||
@ -248,6 +250,7 @@ options CPU_BLUELIGHTNING_3X
|
||||
options CPU_BTB_EN
|
||||
options CPU_DIRECT_MAPPED_CACHE
|
||||
options CPU_DISABLE_5X86_LSSER
|
||||
options CPU_ENABLE_SSE
|
||||
options CPU_FASTER_5X86_FPU
|
||||
options CPU_I486_ON_386
|
||||
options CPU_IORT
|
||||
|
@ -59,6 +59,7 @@ CPU_WT_ALLOC opt_cpu.h
|
||||
CYRIX_CACHE_WORKS opt_cpu.h
|
||||
CYRIX_CACHE_REALLY_WORKS opt_cpu.h
|
||||
NO_MEMORY_HOLE opt_cpu.h
|
||||
CPU_ENABLE_SSE opt_cpu.h
|
||||
|
||||
# The CPU type affects the endian conversion functions all over the kernel.
|
||||
I386_CPU opt_global.h
|
||||
|
@ -187,6 +187,8 @@ cpu I686_CPU # aka Pentium Pro(tm)
|
||||
# reorder). This option should not be used if you use memory mapped
|
||||
# I/O device(s).
|
||||
#
|
||||
# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
|
||||
#
|
||||
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
|
||||
#
|
||||
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
|
||||
@ -248,6 +250,7 @@ options CPU_BLUELIGHTNING_3X
|
||||
options CPU_BTB_EN
|
||||
options CPU_DIRECT_MAPPED_CACHE
|
||||
options CPU_DISABLE_5X86_LSSER
|
||||
options CPU_ENABLE_SSE
|
||||
options CPU_FASTER_5X86_FPU
|
||||
options CPU_I486_ON_386
|
||||
options CPU_IORT
|
||||
|
@ -153,6 +153,9 @@ IDTVEC(fpu)
|
||||
IDTVEC(align)
|
||||
TRAP(T_ALIGNFLT)
|
||||
|
||||
IDTVEC(xmm)
|
||||
pushl $0; TRAP(T_XMMFLT)
|
||||
|
||||
/*
|
||||
* alltraps entry point. Interrupts are enabled if this was a trap
|
||||
* gate (TGT), else disabled if this was an interrupt gate (IGT).
|
||||
|
@ -126,8 +126,9 @@ ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
|
||||
|
||||
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
|
||||
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
|
||||
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
|
||||
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
|
||||
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
|
||||
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
|
||||
ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
|
||||
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
|
||||
|
||||
#ifdef SMP
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <machine/cputypes.h>
|
||||
#include <machine/md_var.h>
|
||||
@ -61,8 +62,14 @@ static void init_6x86(void);
|
||||
static void init_6x86MX(void);
|
||||
static void init_ppro(void);
|
||||
static void init_mendocino(void);
|
||||
void enable_sse();
|
||||
#endif
|
||||
|
||||
int hw_instruction_sse = 0;
|
||||
SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
|
||||
&hw_instruction_sse, 0,
|
||||
"SIMD/MMX2 instructions available in CPU");
|
||||
|
||||
#ifdef I486_CPU
|
||||
/*
|
||||
* IBM Blue Lightning
|
||||
@ -501,6 +508,20 @@ init_mendocino(void)
|
||||
#endif /* CPU_PPRO2CELERON */
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize CR4 (Control register 4) to enable SSE instructions.
|
||||
*/
|
||||
void
|
||||
enable_sse(void)
|
||||
{
|
||||
#if defined(CPU_ENABLE_SSE)
|
||||
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
|
||||
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
|
||||
cpu_fxsr = hw_instruction_sse = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* I686_CPU */
|
||||
|
||||
void
|
||||
@ -544,6 +565,7 @@ initializecpu(void)
|
||||
init_mendocino();
|
||||
break;
|
||||
}
|
||||
enable_sse();
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
@ -113,12 +113,13 @@ HIDENAME(tmpstk):
|
||||
.globl boothowto,bootdev
|
||||
|
||||
.globl cpu,cpu_vendor,cpu_id,bootinfo
|
||||
.globl cpu_high, cpu_feature
|
||||
.globl cpu_high, cpu_feature, cpu_fxsr
|
||||
|
||||
cpu: .long 0 /* are we 386, 386sx, or 486 */
|
||||
cpu_id: .long 0 /* stepping ID */
|
||||
cpu_high: .long 0 /* highest arg to CPUID */
|
||||
cpu_feature: .long 0 /* features */
|
||||
cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
|
||||
cpu_vendor: .space 20 /* CPU origin code */
|
||||
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
|
||||
|
||||
|
@ -127,6 +127,10 @@ extern void initializecpu(void);
|
||||
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
|
||||
|
||||
static void cpu_startup __P((void *));
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
|
||||
static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
|
||||
|
||||
int _udatasel, _ucodesel;
|
||||
@ -1361,7 +1365,7 @@ extern inthand_t
|
||||
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
|
||||
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
|
||||
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
|
||||
IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
|
||||
IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
|
||||
|
||||
void
|
||||
sdtossd(sd, ssd)
|
||||
@ -1900,6 +1904,7 @@ init386(first)
|
||||
setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
setidt(0x80, &IDTVEC(int0x80_syscall),
|
||||
SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
|
||||
|
||||
@ -2092,8 +2097,8 @@ int ptrace_write_u(p, off, data)
|
||||
*(int*)((char *)p->p_addr + off) = data;
|
||||
return (0);
|
||||
}
|
||||
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
|
||||
if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
|
||||
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
|
||||
if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
|
||||
*(int*)((char *)p->p_addr + off) = data;
|
||||
return (0);
|
||||
}
|
||||
@ -2161,12 +2166,73 @@ set_regs(p, regs)
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
static void
|
||||
fill_fpregs_xmm(sv_xmm, sv_87)
|
||||
struct savexmm *sv_xmm;
|
||||
struct save87 *sv_87;
|
||||
{
|
||||
register struct env87 *penv_87 = &sv_87->sv_env;
|
||||
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
|
||||
int i;
|
||||
|
||||
/* FPU control/status */
|
||||
penv_87->en_cw = penv_xmm->en_cw;
|
||||
penv_87->en_sw = penv_xmm->en_sw;
|
||||
penv_87->en_tw = penv_xmm->en_tw;
|
||||
penv_87->en_fip = penv_xmm->en_fip;
|
||||
penv_87->en_fcs = penv_xmm->en_fcs;
|
||||
penv_87->en_opcode = penv_xmm->en_opcode;
|
||||
penv_87->en_foo = penv_xmm->en_foo;
|
||||
penv_87->en_fos = penv_xmm->en_fos;
|
||||
|
||||
/* FPU registers */
|
||||
for (i = 0; i < 8; ++i)
|
||||
sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
|
||||
|
||||
sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
|
||||
}
|
||||
|
||||
static void
|
||||
set_fpregs_xmm(sv_87, sv_xmm)
|
||||
struct save87 *sv_87;
|
||||
struct savexmm *sv_xmm;
|
||||
{
|
||||
register struct env87 *penv_87 = &sv_87->sv_env;
|
||||
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
|
||||
int i;
|
||||
|
||||
/* FPU control/status */
|
||||
penv_xmm->en_cw = penv_87->en_cw;
|
||||
penv_xmm->en_sw = penv_87->en_sw;
|
||||
penv_xmm->en_tw = penv_87->en_tw;
|
||||
penv_xmm->en_fip = penv_87->en_fip;
|
||||
penv_xmm->en_fcs = penv_87->en_fcs;
|
||||
penv_xmm->en_opcode = penv_87->en_opcode;
|
||||
penv_xmm->en_foo = penv_87->en_foo;
|
||||
penv_xmm->en_fos = penv_87->en_fos;
|
||||
|
||||
/* FPU registers */
|
||||
for (i = 0; i < 8; ++i)
|
||||
sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
|
||||
|
||||
sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
|
||||
}
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
|
||||
int
|
||||
fill_fpregs(p, fpregs)
|
||||
struct proc *p;
|
||||
struct fpreg *fpregs;
|
||||
{
|
||||
bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
if (cpu_fxsr) {
|
||||
fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
|
||||
(struct save87 *)fpregs);
|
||||
return (0);
|
||||
}
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -2175,7 +2241,14 @@ set_fpregs(p, fpregs)
|
||||
struct proc *p;
|
||||
struct fpreg *fpregs;
|
||||
{
|
||||
bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
if (cpu_fxsr) {
|
||||
set_fpregs_xmm((struct save87 *)fpregs,
|
||||
&p->p_addr->u_pcb.pcb_save.sv_xmm);
|
||||
return (0);
|
||||
}
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -2221,6 +2221,8 @@ invltlb(void)
|
||||
* This is called once the rest of the system is up and running and we're
|
||||
* ready to let the AP's out of the pen.
|
||||
*/
|
||||
extern void enable_sse(void);
|
||||
|
||||
void
|
||||
ap_init(void)
|
||||
{
|
||||
@ -2260,6 +2262,9 @@ ap_init(void)
|
||||
/* set up FPU state on the AP */
|
||||
npxinit(__INITIAL_NPXCW__);
|
||||
|
||||
/* set up SSE registers */
|
||||
enable_sse();
|
||||
|
||||
/* A quick check from sanity claus */
|
||||
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
|
||||
if (PCPU_GET(cpuid) != apic_id) {
|
||||
|
@ -2221,6 +2221,8 @@ invltlb(void)
|
||||
* This is called once the rest of the system is up and running and we're
|
||||
* ready to let the AP's out of the pen.
|
||||
*/
|
||||
extern void enable_sse(void);
|
||||
|
||||
void
|
||||
ap_init(void)
|
||||
{
|
||||
@ -2260,6 +2262,9 @@ ap_init(void)
|
||||
/* set up FPU state on the AP */
|
||||
npxinit(__INITIAL_NPXCW__);
|
||||
|
||||
/* set up SSE registers */
|
||||
enable_sse();
|
||||
|
||||
/* A quick check from sanity claus */
|
||||
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
|
||||
if (PCPU_GET(cpuid) != apic_id) {
|
||||
|
@ -976,7 +976,7 @@ ENTRY(i586_copyin)
|
||||
ENTRY(fastmove)
|
||||
pushl %ebp
|
||||
movl %esp,%ebp
|
||||
subl $PCB_SAVEFPU_SIZE+3*4,%esp
|
||||
subl $PCB_SAVE87_SIZE+3*4,%esp
|
||||
|
||||
movl 8(%ebp),%ecx
|
||||
cmpl $63,%ecx
|
||||
@ -1018,7 +1018,7 @@ ENTRY(fastmove)
|
||||
movl PCPU(CURPCB),%esi
|
||||
addl $PCB_SAVEFPU,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
movl -12(%ebp),%ecx
|
||||
@ -1102,7 +1102,7 @@ fastmove_loop:
|
||||
addl $PCB_SAVEFPU,%edi
|
||||
movl %esp,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
movl -12(%ebp),%ecx
|
||||
@ -1147,7 +1147,7 @@ fastmove_fault:
|
||||
addl $PCB_SAVEFPU,%edi
|
||||
movl %esp,%esi
|
||||
cld
|
||||
movl $PCB_SAVEFPU_SIZE>>2,%ecx
|
||||
movl $PCB_SAVE87_SIZE>>2,%ecx
|
||||
rep
|
||||
movsl
|
||||
|
||||
|
@ -386,6 +386,11 @@ restart:
|
||||
ucode = T_FPOPFLT;
|
||||
i = SIGILL;
|
||||
break;
|
||||
|
||||
case T_XMMFLT: /* SIMD floating-point exception */
|
||||
ucode = 0; /* XXX */
|
||||
i = SIGFPE;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* kernel trap */
|
||||
|
@ -148,7 +148,7 @@ cpu_fork(p1, p2, flags)
|
||||
p1->p_addr->u_pcb.pcb_gs = rgs();
|
||||
savecrit = critical_enter();
|
||||
if (PCPU_GET(npxproc) == p1)
|
||||
npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
|
||||
npxsave(&p1->p_addr->u_pcb.pcb_save);
|
||||
critical_exit(savecrit);
|
||||
#endif
|
||||
|
||||
|
@ -47,6 +47,7 @@ extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
|
||||
extern u_int cpu_feature;
|
||||
extern u_int cpu_high;
|
||||
extern u_int cpu_id;
|
||||
extern u_int cpu_fxsr;
|
||||
extern char cpu_vendor[];
|
||||
extern u_int cyrix_did;
|
||||
extern char kstack[];
|
||||
|
@ -2221,6 +2221,8 @@ invltlb(void)
|
||||
* This is called once the rest of the system is up and running and we're
|
||||
* ready to let the AP's out of the pen.
|
||||
*/
|
||||
extern void enable_sse(void);
|
||||
|
||||
void
|
||||
ap_init(void)
|
||||
{
|
||||
@ -2260,6 +2262,9 @@ ap_init(void)
|
||||
/* set up FPU state on the AP */
|
||||
npxinit(__INITIAL_NPXCW__);
|
||||
|
||||
/* set up SSE registers */
|
||||
enable_sse();
|
||||
|
||||
/* A quick check from sanity claus */
|
||||
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
|
||||
if (PCPU_GET(cpuid) != apic_id) {
|
||||
|
@ -85,6 +85,42 @@ struct save87 {
|
||||
u_char sv_pad[64]; /* padding; used by emulators */
|
||||
};
|
||||
|
||||
struct envxmm {
|
||||
u_int16_t en_cw; /* control word (16bits) */
|
||||
u_int16_t en_sw; /* status word (16bits) */
|
||||
u_int16_t en_tw; /* tag word (16bits) */
|
||||
u_int16_t en_opcode; /* opcode last executed (11 bits ) */
|
||||
u_int32_t en_fip; /* floating point instruction pointer */
|
||||
u_int16_t en_fcs; /* floating code segment selector */
|
||||
u_int16_t en_pad0; /* padding */
|
||||
u_int32_t en_foo; /* floating operand offset */
|
||||
u_int16_t en_fos; /* floating operand segment selector */
|
||||
u_int16_t en_pad1; /* padding */
|
||||
u_int32_t en_mxcsr; /* SSE sontorol/status register */
|
||||
u_int32_t en_pad2; /* padding */
|
||||
};
|
||||
|
||||
/* Contents of each SSE extended accumulator */
|
||||
struct xmmacc {
|
||||
u_char xmm_bytes[16];
|
||||
};
|
||||
|
||||
struct savexmm {
|
||||
struct envxmm sv_env;
|
||||
struct {
|
||||
struct fpacc87 fp_acc;
|
||||
u_char fp_pad[6]; /* padding */
|
||||
} sv_fp[8];
|
||||
struct xmmacc sv_xmm[8];
|
||||
u_long sv_ex_sw; /* status word for last exception */
|
||||
u_char sv_pad[220];
|
||||
} __attribute__((aligned(16)));
|
||||
|
||||
union savefpu {
|
||||
struct save87 sv_87;
|
||||
struct savexmm sv_xmm;
|
||||
};
|
||||
|
||||
/*
|
||||
* The hardware default control word for i387's and later coprocessors is
|
||||
* 0x37F, giving:
|
||||
@ -108,7 +144,7 @@ struct save87 {
|
||||
int npxdna __P((void));
|
||||
void npxexit __P((struct proc *p));
|
||||
void npxinit __P((int control));
|
||||
void npxsave __P((struct save87 *addr));
|
||||
void npxsave __P((union savefpu *addr));
|
||||
int npxtrap __P((void));
|
||||
#endif
|
||||
|
||||
|
@ -62,7 +62,8 @@ struct pcb {
|
||||
int pcb_dr7;
|
||||
|
||||
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
|
||||
struct save87 pcb_savefpu; /* floating point state for 287/387 */
|
||||
union savefpu pcb_save;
|
||||
#define pcb_savefpu pcb_save.sv_87
|
||||
u_char pcb_flags;
|
||||
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
|
||||
#define PCB_DBREGS 0x02 /* process using debug registers */
|
||||
|
@ -93,6 +93,8 @@
|
||||
#define CPUID_PGE 0x2000
|
||||
#define CPUID_MCA 0x4000
|
||||
#define CPUID_CMOV 0x8000
|
||||
#define CPUID_FXSR 0x01000000
|
||||
#define CPUID_XMM 0x02000000
|
||||
|
||||
/*
|
||||
* Model-specific registers for the i386 family
|
||||
|
@ -64,7 +64,8 @@
|
||||
#define T_SEGNPFLT 26 /* segment not present fault */
|
||||
#define T_STKFLT 27 /* stack fault */
|
||||
#define T_MCHK 28 /* machine check trap */
|
||||
#define T_RESERVED 29 /* reserved (unknown) */
|
||||
#define T_XMMFLT 29 /* SIMD floating-point exception */
|
||||
#define T_RESERVED 30 /* reserved (unknown) */
|
||||
|
||||
/* XXX most of the following codes aren't used, but could be. */
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "opt_cpu.h"
|
||||
#include "opt_debug_npx.h"
|
||||
#include "opt_math_emulate.h"
|
||||
|
||||
@ -99,6 +100,8 @@
|
||||
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
|
||||
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
|
||||
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
|
||||
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
|
||||
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
|
||||
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
|
||||
: : "n" (CR0_TS) : "ax")
|
||||
#define stop_emulating() __asm("clts")
|
||||
@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
|
||||
void fnstsw __P((caddr_t addr));
|
||||
void fp_divide_by_0 __P((void));
|
||||
void frstor __P((caddr_t addr));
|
||||
void fxsave __P((caddr_t addr));
|
||||
void fxrstor __P((caddr_t addr));
|
||||
void start_emulating __P((void));
|
||||
void stop_emulating __P((void));
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#ifdef CPU_ENABLE_SSE
|
||||
#define GET_FPU_CW(proc) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
|
||||
#define GET_FPU_SW(proc) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
|
||||
#define MASK_FPU_SW(proc, mask) \
|
||||
(cpu_fxsr ? \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
|
||||
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
|
||||
#define GET_FPU_EXSW_PTR(pcb) \
|
||||
(cpu_fxsr ? \
|
||||
&(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
|
||||
&(pcb)->pcb_save.sv_87.sv_ex_sw)
|
||||
#else /* CPU_ENABLE_SSE */
|
||||
#define GET_FPU_CW(proc) \
|
||||
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
|
||||
#define GET_FPU_SW(proc) \
|
||||
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
|
||||
#define MASK_FPU_SW(proc, mask) \
|
||||
((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
|
||||
#define GET_FPU_EXSW_PTR(pcb) \
|
||||
(&(pcb)->pcb_save.sv_87.sv_ex_sw)
|
||||
#endif /* CPU_ENABLE_SSE */
|
||||
|
||||
typedef u_char bool_t;
|
||||
|
||||
static int npx_attach __P((device_t dev));
|
||||
@ -127,6 +160,8 @@ static void npx_intr __P((void *));
|
||||
#endif
|
||||
static int npx_probe __P((device_t dev));
|
||||
static int npx_probe1 __P((device_t dev));
|
||||
static void fpusave __P((union savefpu *, u_char));
|
||||
static void fpurstor __P((union savefpu *, u_char));
|
||||
#ifdef I586_CPU_XXX
|
||||
static long timezero __P((const char *funcname,
|
||||
void (*func)(void *buf, size_t len)));
|
||||
@ -529,7 +564,7 @@ void
|
||||
npxinit(control)
|
||||
u_short control;
|
||||
{
|
||||
struct save87 dummy;
|
||||
union savefpu dummy;
|
||||
critical_t savecrit;
|
||||
|
||||
if (!npx_exists)
|
||||
@ -544,7 +579,7 @@ npxinit(control)
|
||||
stop_emulating();
|
||||
fldcw(&control);
|
||||
if (PCPU_GET(curpcb) != NULL)
|
||||
fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
|
||||
start_emulating();
|
||||
critical_exit(savecrit);
|
||||
}
|
||||
@ -560,7 +595,7 @@ npxexit(p)
|
||||
|
||||
savecrit = critical_enter();
|
||||
if (p == PCPU_GET(npxproc))
|
||||
npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
npxsave(&PCPU_GET(curpcb)->pcb_save);
|
||||
critical_exit(savecrit);
|
||||
#ifdef NPX_DEBUG
|
||||
if (npx_exists) {
|
||||
@ -773,6 +808,7 @@ npxtrap()
|
||||
{
|
||||
critical_t savecrit;
|
||||
u_short control, status;
|
||||
u_long *exstat;
|
||||
|
||||
if (!npx_exists) {
|
||||
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
|
||||
@ -787,16 +823,17 @@ npxtrap()
|
||||
* wherever they are.
|
||||
*/
|
||||
if (PCPU_GET(npxproc) != curproc) {
|
||||
control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
|
||||
status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
|
||||
control = GET_FPU_CW(curproc);
|
||||
status = GET_FPU_SW(curproc);
|
||||
} else {
|
||||
fnstcw(&control);
|
||||
fnstsw(&status);
|
||||
}
|
||||
|
||||
curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
|
||||
exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
|
||||
*exstat = status;
|
||||
if (PCPU_GET(npxproc) != curproc)
|
||||
curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
|
||||
MASK_FPU_SW(curproc, ~0x80bf);
|
||||
else
|
||||
fnclex();
|
||||
critical_exit(savecrit);
|
||||
@ -813,6 +850,7 @@ npxtrap()
|
||||
int
|
||||
npxdna()
|
||||
{
|
||||
u_long *exstat;
|
||||
critical_t s;
|
||||
|
||||
if (!npx_exists)
|
||||
@ -828,7 +866,9 @@ npxdna()
|
||||
* Record new context early in case frstor causes an IRQ13.
|
||||
*/
|
||||
PCPU_SET(npxproc, CURPROC);
|
||||
PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
|
||||
|
||||
exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
|
||||
*exstat = 0;
|
||||
/*
|
||||
* The following frstor may cause an IRQ13 when the state being
|
||||
* restored has a pending error. The error will appear to have been
|
||||
@ -841,7 +881,7 @@ npxdna()
|
||||
* fnsave are broken, so our treatment breaks fnclex if it is the
|
||||
* first FPU instruction after a context switch.
|
||||
*/
|
||||
frstor(&PCPU_GET(curpcb)->pcb_savefpu);
|
||||
fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
|
||||
critical_exit(s);
|
||||
|
||||
return (1);
|
||||
@ -872,15 +912,46 @@ npxdna()
|
||||
*/
|
||||
void
|
||||
npxsave(addr)
|
||||
struct save87 *addr;
|
||||
union savefpu *addr;
|
||||
{
|
||||
|
||||
stop_emulating();
|
||||
fnsave(addr);
|
||||
fpusave(addr, curproc->p_oncpu);
|
||||
|
||||
start_emulating();
|
||||
PCPU_SET(npxproc, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
fpusave(addr, oncpu)
|
||||
union savefpu *addr;
|
||||
u_char oncpu;
|
||||
{
|
||||
static struct savexmm svxmm[MAXCPU];
|
||||
|
||||
if (!cpu_fxsr)
|
||||
fnsave(addr);
|
||||
else {
|
||||
fxsave(&svxmm[oncpu]);
|
||||
bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fpurstor(addr, oncpu)
|
||||
union savefpu *addr;
|
||||
u_char oncpu;
|
||||
{
|
||||
static struct savexmm svxmm[MAXCPU];
|
||||
|
||||
if (!cpu_fxsr)
|
||||
frstor(addr);
|
||||
else {
|
||||
bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
|
||||
fxrstor(&svxmm[oncpu]);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef I586_CPU_XXX
|
||||
static long
|
||||
timezero(funcname, func)
|
||||
|
Loading…
x
Reference in New Issue
Block a user