Activate SSE/SIMD. This is the extra context switching support that

we are required to do if we let user processes use the extra 128 bit
registers etc.

This is the base part of the diff I got from:
  http://www.issei.org/issei/FreeBSD/sse.html
I believe this is by:  Mr. SUZUKI Issei <issei@issei.org>
SMP support apparently by: Takekazu KATO <kato@chino.it.okayama-u.ac.jp>
Test code by: NAKAMURA Kazushi <kaz@kobe1995.net>, see
  http://kobe1995.net/~kaz/FreeBSD/SSE.en.html

I have fixed a couple of style(9) deviations.  I have some followup
commits to fix a couple of non-style things.
This commit is contained in:
peter 2001-07-12 06:32:51 +00:00
parent b5164c6585
commit e00129231d
42 changed files with 653 additions and 71 deletions

View File

@ -153,6 +153,9 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
IDTVEC(xmm)
pushl $0; TRAP(T_XMMFLT)
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).

View File

@ -153,6 +153,9 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
IDTVEC(xmm)
pushl $0; TRAP(T_XMMFLT)
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).

View File

@ -35,6 +35,7 @@
* $FreeBSD$
*/
#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
void fxsave __P((caddr_t addr));
void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(proc) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
#define GET_FPU_SW(proc) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
#define MASK_FPU_SW(proc, mask) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
#define GET_FPU_EXSW_PTR(pcb) \
(cpu_fxsr ? \
&(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
&(pcb)->pcb_save.sv_87.sv_ex_sw)
#else /* CPU_ENABLE_SSE */
#define GET_FPU_CW(proc) \
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
#define GET_FPU_SW(proc) \
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
#define MASK_FPU_SW(proc, mask) \
((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
#define GET_FPU_EXSW_PTR(pcb) \
(&(pcb)->pcb_save.sv_87.sv_ex_sw)
#endif /* CPU_ENABLE_SSE */
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
static void fpusave __P((union savefpu *, u_char));
static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
struct save87 dummy;
union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
control = GET_FPU_CW(curproc);
status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
*exstat = status;
if (PCPU_GET(npxproc) != curproc)
curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
u_long *exstat;
critical_t s;
if (!npx_exists)
@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
*exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
frstor(&PCPU_GET(curpcb)->pcb_savefpu);
fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
struct save87 *addr;
union savefpu *addr;
{
stop_emulating();
fnsave(addr);
fpusave(addr, curproc->p_oncpu);
start_emulating();
PCPU_SET(npxproc, NULL);
}
static void
fpusave(addr, oncpu)
union savefpu *addr;
u_char oncpu;
{
static struct savexmm svxmm[MAXCPU];
if (!cpu_fxsr)
fnsave(addr);
else {
fxsave(&svxmm[oncpu]);
bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
}
}
static void
fpurstor(addr, oncpu)
union savefpu *addr;
u_char oncpu;
{
static struct savexmm svxmm[MAXCPU];
if (!cpu_fxsr)
frstor(addr);
else {
bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
fxrstor(&svxmm[oncpu]);
}
}
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)

View File

@ -126,8 +126,9 @@ ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
#ifdef SMP

View File

@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@ -61,8 +62,14 @@ static void init_6x86(void);
static void init_6x86MX(void);
static void init_ppro(void);
static void init_mendocino(void);
void enable_sse();
#endif
int hw_instruction_sse = 0;
SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
&hw_instruction_sse, 0,
"SIMD/MMX2 instructions available in CPU");
#ifdef I486_CPU
/*
* IBM Blue Lightning
@ -501,6 +508,20 @@ init_mendocino(void)
#endif /* CPU_PPRO2CELERON */
}
/*
* Initialize CR4 (Control register 4) to enable SSE instructions.
*/
void
enable_sse(void)
{
#if defined(CPU_ENABLE_SSE)
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
cpu_fxsr = hw_instruction_sse = 1;
}
#endif
}
#endif /* I686_CPU */
void
@ -544,6 +565,7 @@ initializecpu(void)
init_mendocino();
break;
}
enable_sse();
}
break;
#endif

View File

@ -113,12 +113,13 @@ HIDENAME(tmpstk):
.globl boothowto,bootdev
.globl cpu,cpu_vendor,cpu_id,bootinfo
.globl cpu_high, cpu_feature
.globl cpu_high, cpu_feature, cpu_fxsr
cpu: .long 0 /* are we 386, 386sx, or 486 */
cpu_id: .long 0 /* stepping ID */
cpu_high: .long 0 /* highest arg to CPUID */
cpu_feature: .long 0 /* features */
cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
cpu_vendor: .space 20 /* CPU origin code */
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */

View File

@ -113,12 +113,13 @@ HIDENAME(tmpstk):
.globl boothowto,bootdev
.globl cpu,cpu_vendor,cpu_id,bootinfo
.globl cpu_high, cpu_feature
.globl cpu_high, cpu_feature, cpu_fxsr
cpu: .long 0 /* are we 386, 386sx, or 486 */
cpu_id: .long 0 /* stepping ID */
cpu_high: .long 0 /* highest arg to CPUID */
cpu_feature: .long 0 /* features */
cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
cpu_vendor: .space 20 /* CPU origin code */
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */

View File

@ -127,6 +127,10 @@ extern void initializecpu(void);
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
static void cpu_startup __P((void *));
#ifdef CPU_ENABLE_SSE
static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
#endif /* CPU_ENABLE_SSE */
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
int _udatasel, _ucodesel;
@ -1361,7 +1365,7 @@ extern inthand_t
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
void
sdtossd(sd, ssd)
@ -1900,6 +1904,7 @@ init386(first)
setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(0x80, &IDTVEC(int0x80_syscall),
SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
@ -2092,8 +2097,8 @@ int ptrace_write_u(p, off, data)
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
@ -2161,12 +2166,73 @@ set_regs(p, regs)
return (0);
}
#ifdef CPU_ENABLE_SSE
static void
fill_fpregs_xmm(sv_xmm, sv_87)
struct savexmm *sv_xmm;
struct save87 *sv_87;
{
register struct env87 *penv_87 = &sv_87->sv_env;
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
int i;
/* FPU control/status */
penv_87->en_cw = penv_xmm->en_cw;
penv_87->en_sw = penv_xmm->en_sw;
penv_87->en_tw = penv_xmm->en_tw;
penv_87->en_fip = penv_xmm->en_fip;
penv_87->en_fcs = penv_xmm->en_fcs;
penv_87->en_opcode = penv_xmm->en_opcode;
penv_87->en_foo = penv_xmm->en_foo;
penv_87->en_fos = penv_xmm->en_fos;
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
}
static void
set_fpregs_xmm(sv_87, sv_xmm)
struct save87 *sv_87;
struct savexmm *sv_xmm;
{
register struct env87 *penv_87 = &sv_87->sv_env;
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
int i;
/* FPU control/status */
penv_xmm->en_cw = penv_87->en_cw;
penv_xmm->en_sw = penv_87->en_sw;
penv_xmm->en_tw = penv_87->en_tw;
penv_xmm->en_fip = penv_87->en_fip;
penv_xmm->en_fcs = penv_87->en_fcs;
penv_xmm->en_opcode = penv_87->en_opcode;
penv_xmm->en_foo = penv_87->en_foo;
penv_xmm->en_fos = penv_87->en_fos;
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
}
#endif /* CPU_ENABLE_SSE */
int
fill_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
(struct save87 *)fpregs);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
return (0);
}
@ -2175,7 +2241,14 @@ set_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
set_fpregs_xmm((struct save87 *)fpregs,
&p->p_addr->u_pcb.pcb_save.sv_xmm);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
return (0);
}

View File

@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
extern void enable_sse(void);
void
ap_init(void)
{
@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
/* set up SSE registers */
enable_sse();
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {

View File

@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
extern void enable_sse(void);
void
ap_init(void)
{
@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
/* set up SSE registers */
enable_sse();
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {

View File

@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
subl $PCB_SAVEFPU_SIZE+3*4,%esp
subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl

View File

@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
subl $PCB_SAVEFPU_SIZE+3*4,%esp
subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl

View File

@ -386,6 +386,11 @@ restart:
ucode = T_FPOPFLT;
i = SIGILL;
break;
case T_XMMFLT: /* SIMD floating-point exception */
ucode = 0; /* XXX */
i = SIGFPE;
break;
}
} else {
/* kernel trap */

View File

@ -148,7 +148,7 @@ cpu_fork(p1, p2, flags)
p1->p_addr->u_pcb.pcb_gs = rgs();
savecrit = critical_enter();
if (PCPU_GET(npxproc) == p1)
npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
npxsave(&p1->p_addr->u_pcb.pcb_save);
critical_exit(savecrit);
#endif

View File

@ -85,6 +85,42 @@ struct save87 {
u_char sv_pad[64]; /* padding; used by emulators */
};
struct envxmm {
u_int16_t en_cw; /* control word (16bits) */
u_int16_t en_sw; /* status word (16bits) */
u_int16_t en_tw; /* tag word (16bits) */
u_int16_t en_opcode; /* opcode last executed (11 bits ) */
u_int32_t en_fip; /* floating point instruction pointer */
u_int16_t en_fcs; /* floating code segment selector */
u_int16_t en_pad0; /* padding */
u_int32_t en_foo; /* floating operand offset */
u_int16_t en_fos; /* floating operand segment selector */
u_int16_t en_pad1; /* padding */
u_int32_t en_mxcsr; /* SSE sontorol/status register */
u_int32_t en_pad2; /* padding */
};
/* Contents of each SSE extended accumulator */
struct xmmacc {
u_char xmm_bytes[16];
};
struct savexmm {
struct envxmm sv_env;
struct {
struct fpacc87 fp_acc;
u_char fp_pad[6]; /* padding */
} sv_fp[8];
struct xmmacc sv_xmm[8];
u_long sv_ex_sw; /* status word for last exception */
u_char sv_pad[220];
} __attribute__((aligned(16)));
union savefpu {
struct save87 sv_87;
struct savexmm sv_xmm;
};
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@ -108,7 +144,7 @@ struct save87 {
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
void npxsave __P((struct save87 *addr));
void npxsave __P((union savefpu *addr));
int npxtrap __P((void));
#endif

View File

@ -47,6 +47,7 @@ extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
extern u_int cpu_feature;
extern u_int cpu_high;
extern u_int cpu_id;
extern u_int cpu_fxsr;
extern char cpu_vendor[];
extern u_int cyrix_did;
extern char kstack[];

View File

@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
extern void enable_sse(void);
void
ap_init(void)
{
@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
/* set up SSE registers */
enable_sse();
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {

View File

@ -85,6 +85,42 @@ struct save87 {
u_char sv_pad[64]; /* padding; used by emulators */
};
struct envxmm {
u_int16_t en_cw; /* control word (16bits) */
u_int16_t en_sw; /* status word (16bits) */
u_int16_t en_tw; /* tag word (16bits) */
u_int16_t en_opcode; /* opcode last executed (11 bits ) */
u_int32_t en_fip; /* floating point instruction pointer */
u_int16_t en_fcs; /* floating code segment selector */
u_int16_t en_pad0; /* padding */
u_int32_t en_foo; /* floating operand offset */
u_int16_t en_fos; /* floating operand segment selector */
u_int16_t en_pad1; /* padding */
u_int32_t en_mxcsr; /* SSE sontorol/status register */
u_int32_t en_pad2; /* padding */
};
/* Contents of each SSE extended accumulator */
struct xmmacc {
u_char xmm_bytes[16];
};
struct savexmm {
struct envxmm sv_env;
struct {
struct fpacc87 fp_acc;
u_char fp_pad[6]; /* padding */
} sv_fp[8];
struct xmmacc sv_xmm[8];
u_long sv_ex_sw; /* status word for last exception */
u_char sv_pad[220];
} __attribute__((aligned(16)));
union savefpu {
struct save87 sv_87;
struct savexmm sv_xmm;
};
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@ -108,7 +144,7 @@ struct save87 {
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
void npxsave __P((struct save87 *addr));
void npxsave __P((union savefpu *addr));
int npxtrap __P((void));
#endif

View File

@ -62,7 +62,8 @@ struct pcb {
int pcb_dr7;
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
struct save87 pcb_savefpu; /* floating point state for 287/387 */
union savefpu pcb_save;
#define pcb_savefpu pcb_save.sv_87
u_char pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */

View File

@ -93,6 +93,8 @@
#define CPUID_PGE 0x2000
#define CPUID_MCA 0x4000
#define CPUID_CMOV 0x8000
#define CPUID_FXSR 0x01000000
#define CPUID_XMM 0x02000000
/*
* Model-specific registers for the i386 family

View File

@ -64,7 +64,8 @@
#define T_SEGNPFLT 26 /* segment not present fault */
#define T_STKFLT 27 /* stack fault */
#define T_MCHK 28 /* machine check trap */
#define T_RESERVED 29 /* reserved (unknown) */
#define T_XMMFLT 29 /* SIMD floating-point exception */
#define T_RESERVED 30 /* reserved (unknown) */
/* XXX most of the following codes aren't used, but could be. */

View File

@ -35,6 +35,7 @@
* $FreeBSD$
*/
#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
void fxsave __P((caddr_t addr));
void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(proc) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
#define GET_FPU_SW(proc) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
#define MASK_FPU_SW(proc, mask) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
#define GET_FPU_EXSW_PTR(pcb) \
(cpu_fxsr ? \
&(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
&(pcb)->pcb_save.sv_87.sv_ex_sw)
#else /* CPU_ENABLE_SSE */
#define GET_FPU_CW(proc) \
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
#define GET_FPU_SW(proc) \
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
#define MASK_FPU_SW(proc, mask) \
((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
#define GET_FPU_EXSW_PTR(pcb) \
(&(pcb)->pcb_save.sv_87.sv_ex_sw)
#endif /* CPU_ENABLE_SSE */
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
static void fpusave __P((union savefpu *, u_char));
static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
struct save87 dummy;
union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
control = GET_FPU_CW(curproc);
status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
*exstat = status;
if (PCPU_GET(npxproc) != curproc)
curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
u_long *exstat;
critical_t s;
if (!npx_exists)
@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
*exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
frstor(&PCPU_GET(curpcb)->pcb_savefpu);
fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
struct save87 *addr;
union savefpu *addr;
{
stop_emulating();
fnsave(addr);
fpusave(addr, curproc->p_oncpu);
start_emulating();
PCPU_SET(npxproc, NULL);
}
static void
fpusave(addr, oncpu)
union savefpu *addr;
u_char oncpu;
{
static struct savexmm svxmm[MAXCPU];
if (!cpu_fxsr)
fnsave(addr);
else {
fxsave(&svxmm[oncpu]);
bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
}
}
static void
fpurstor(addr, oncpu)
union savefpu *addr;
u_char oncpu;
{
static struct savexmm svxmm[MAXCPU];
if (!cpu_fxsr)
frstor(addr);
else {
bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
fxrstor(&svxmm[oncpu]);
}
}
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)

View File

@ -187,6 +187,8 @@ cpu I686_CPU # aka Pentium Pro(tm)
# reorder). This option should not be used if you use memory mapped
# I/O device(s).
#
# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
#
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
#
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
@ -248,6 +250,7 @@ options CPU_BLUELIGHTNING_3X
options CPU_BTB_EN
options CPU_DIRECT_MAPPED_CACHE
options CPU_DISABLE_5X86_LSSER
options CPU_ENABLE_SSE
options CPU_FASTER_5X86_FPU
options CPU_I486_ON_386
options CPU_IORT

View File

@ -59,6 +59,7 @@ CPU_WT_ALLOC opt_cpu.h
CYRIX_CACHE_WORKS opt_cpu.h
CYRIX_CACHE_REALLY_WORKS opt_cpu.h
NO_MEMORY_HOLE opt_cpu.h
CPU_ENABLE_SSE opt_cpu.h
# The CPU type affects the endian conversion functions all over the kernel.
I386_CPU opt_global.h

View File

@ -187,6 +187,8 @@ cpu I686_CPU # aka Pentium Pro(tm)
# reorder). This option should not be used if you use memory mapped
# I/O device(s).
#
# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
#
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
#
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
@ -248,6 +250,7 @@ options CPU_BLUELIGHTNING_3X
options CPU_BTB_EN
options CPU_DIRECT_MAPPED_CACHE
options CPU_DISABLE_5X86_LSSER
options CPU_ENABLE_SSE
options CPU_FASTER_5X86_FPU
options CPU_I486_ON_386
options CPU_IORT

View File

@ -153,6 +153,9 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
IDTVEC(xmm)
pushl $0; TRAP(T_XMMFLT)
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).

View File

@ -126,8 +126,9 @@ ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
#ifdef SMP

View File

@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@ -61,8 +62,14 @@ static void init_6x86(void);
static void init_6x86MX(void);
static void init_ppro(void);
static void init_mendocino(void);
void enable_sse();
#endif
int hw_instruction_sse = 0;
SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
&hw_instruction_sse, 0,
"SIMD/MMX2 instructions available in CPU");
#ifdef I486_CPU
/*
* IBM Blue Lightning
@ -501,6 +508,20 @@ init_mendocino(void)
#endif /* CPU_PPRO2CELERON */
}
/*
* Initialize CR4 (Control register 4) to enable SSE instructions.
*/
void
enable_sse(void)
{
#if defined(CPU_ENABLE_SSE)
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
cpu_fxsr = hw_instruction_sse = 1;
}
#endif
}
#endif /* I686_CPU */
void
@ -544,6 +565,7 @@ initializecpu(void)
init_mendocino();
break;
}
enable_sse();
}
break;
#endif

View File

@ -113,12 +113,13 @@ HIDENAME(tmpstk):
.globl boothowto,bootdev
.globl cpu,cpu_vendor,cpu_id,bootinfo
.globl cpu_high, cpu_feature
.globl cpu_high, cpu_feature, cpu_fxsr
cpu: .long 0 /* are we 386, 386sx, or 486 */
cpu_id: .long 0 /* stepping ID */
cpu_high: .long 0 /* highest arg to CPUID */
cpu_feature: .long 0 /* features */
cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
cpu_vendor: .space 20 /* CPU origin code */
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */

View File

@ -127,6 +127,10 @@ extern void initializecpu(void);
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
static void cpu_startup __P((void *));
#ifdef CPU_ENABLE_SSE
static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
#endif /* CPU_ENABLE_SSE */
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
int _udatasel, _ucodesel;
@ -1361,7 +1365,7 @@ extern inthand_t
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
void
sdtossd(sd, ssd)
@ -1900,6 +1904,7 @@ init386(first)
setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(0x80, &IDTVEC(int0x80_syscall),
SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
@ -2092,8 +2097,8 @@ int ptrace_write_u(p, off, data)
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
@ -2161,12 +2166,73 @@ set_regs(p, regs)
return (0);
}
#ifdef CPU_ENABLE_SSE
static void
fill_fpregs_xmm(sv_xmm, sv_87)
struct savexmm *sv_xmm;
struct save87 *sv_87;
{
register struct env87 *penv_87 = &sv_87->sv_env;
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
int i;
/* FPU control/status */
penv_87->en_cw = penv_xmm->en_cw;
penv_87->en_sw = penv_xmm->en_sw;
penv_87->en_tw = penv_xmm->en_tw;
penv_87->en_fip = penv_xmm->en_fip;
penv_87->en_fcs = penv_xmm->en_fcs;
penv_87->en_opcode = penv_xmm->en_opcode;
penv_87->en_foo = penv_xmm->en_foo;
penv_87->en_fos = penv_xmm->en_fos;
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
}
static void
set_fpregs_xmm(sv_87, sv_xmm)
struct save87 *sv_87;
struct savexmm *sv_xmm;
{
register struct env87 *penv_87 = &sv_87->sv_env;
register struct envxmm *penv_xmm = &sv_xmm->sv_env;
int i;
/* FPU control/status */
penv_xmm->en_cw = penv_87->en_cw;
penv_xmm->en_sw = penv_87->en_sw;
penv_xmm->en_tw = penv_87->en_tw;
penv_xmm->en_fip = penv_87->en_fip;
penv_xmm->en_fcs = penv_87->en_fcs;
penv_xmm->en_opcode = penv_87->en_opcode;
penv_xmm->en_foo = penv_87->en_foo;
penv_xmm->en_fos = penv_87->en_fos;
/* FPU registers */
for (i = 0; i < 8; ++i)
sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
}
#endif /* CPU_ENABLE_SSE */
int
fill_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
(struct save87 *)fpregs);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
return (0);
}
@ -2175,7 +2241,14 @@ set_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
set_fpregs_xmm((struct save87 *)fpregs,
&p->p_addr->u_pcb.pcb_save.sv_xmm);
return (0);
}
#endif /* CPU_ENABLE_SSE */
bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
return (0);
}

View File

@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
extern void enable_sse(void);
void
ap_init(void)
{
@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
/* set up SSE registers */
enable_sse();
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {

View File

@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
extern void enable_sse(void);
void
ap_init(void)
{
@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
/* set up SSE registers */
enable_sse();
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {

View File

@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
subl $PCB_SAVEFPU_SIZE+3*4,%esp
subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
movl $PCB_SAVEFPU_SIZE>>2,%ecx
movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl

View File

@ -386,6 +386,11 @@ restart:
ucode = T_FPOPFLT;
i = SIGILL;
break;
case T_XMMFLT: /* SIMD floating-point exception */
ucode = 0; /* XXX */
i = SIGFPE;
break;
}
} else {
/* kernel trap */

View File

@ -148,7 +148,7 @@ cpu_fork(p1, p2, flags)
p1->p_addr->u_pcb.pcb_gs = rgs();
savecrit = critical_enter();
if (PCPU_GET(npxproc) == p1)
npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
npxsave(&p1->p_addr->u_pcb.pcb_save);
critical_exit(savecrit);
#endif

View File

@ -47,6 +47,7 @@ extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
extern u_int cpu_feature;
extern u_int cpu_high;
extern u_int cpu_id;
extern u_int cpu_fxsr;
extern char cpu_vendor[];
extern u_int cyrix_did;
extern char kstack[];

View File

@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
extern void enable_sse(void);
void
ap_init(void)
{
@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
/* set up SSE registers */
enable_sse();
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {

View File

@ -85,6 +85,42 @@ struct save87 {
u_char sv_pad[64]; /* padding; used by emulators */
};
struct envxmm {
u_int16_t en_cw; /* control word (16bits) */
u_int16_t en_sw; /* status word (16bits) */
u_int16_t en_tw; /* tag word (16bits) */
u_int16_t en_opcode; /* opcode last executed (11 bits ) */
u_int32_t en_fip; /* floating point instruction pointer */
u_int16_t en_fcs; /* floating code segment selector */
u_int16_t en_pad0; /* padding */
u_int32_t en_foo; /* floating operand offset */
u_int16_t en_fos; /* floating operand segment selector */
u_int16_t en_pad1; /* padding */
u_int32_t en_mxcsr; /* SSE sontorol/status register */
u_int32_t en_pad2; /* padding */
};
/* Contents of each SSE extended accumulator */
struct xmmacc {
u_char xmm_bytes[16];
};
struct savexmm {
struct envxmm sv_env;
struct {
struct fpacc87 fp_acc;
u_char fp_pad[6]; /* padding */
} sv_fp[8];
struct xmmacc sv_xmm[8];
u_long sv_ex_sw; /* status word for last exception */
u_char sv_pad[220];
} __attribute__((aligned(16)));
union savefpu {
struct save87 sv_87;
struct savexmm sv_xmm;
};
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@ -108,7 +144,7 @@ struct save87 {
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
void npxsave __P((struct save87 *addr));
void npxsave __P((union savefpu *addr));
int npxtrap __P((void));
#endif

View File

@ -62,7 +62,8 @@ struct pcb {
int pcb_dr7;
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
struct save87 pcb_savefpu; /* floating point state for 287/387 */
union savefpu pcb_save;
#define pcb_savefpu pcb_save.sv_87
u_char pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */

View File

@ -93,6 +93,8 @@
#define CPUID_PGE 0x2000
#define CPUID_MCA 0x4000
#define CPUID_CMOV 0x8000
#define CPUID_FXSR 0x01000000
#define CPUID_XMM 0x02000000
/*
* Model-specific registers for the i386 family

View File

@ -64,7 +64,8 @@
#define T_SEGNPFLT 26 /* segment not present fault */
#define T_STKFLT 27 /* stack fault */
#define T_MCHK 28 /* machine check trap */
#define T_RESERVED 29 /* reserved (unknown) */
#define T_XMMFLT 29 /* SIMD floating-point exception */
#define T_RESERVED 30 /* reserved (unknown) */
/* XXX most of the following codes aren't used, but could be. */

View File

@ -35,6 +35,7 @@
* $FreeBSD$
*/
#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
void fxsave __P((caddr_t addr));
void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(proc) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
#define GET_FPU_SW(proc) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
#define MASK_FPU_SW(proc, mask) \
(cpu_fxsr ? \
(proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
(proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
#define GET_FPU_EXSW_PTR(pcb) \
(cpu_fxsr ? \
&(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
&(pcb)->pcb_save.sv_87.sv_ex_sw)
#else /* CPU_ENABLE_SSE */
#define GET_FPU_CW(proc) \
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
#define GET_FPU_SW(proc) \
(proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
#define MASK_FPU_SW(proc, mask) \
((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
#define GET_FPU_EXSW_PTR(pcb) \
(&(pcb)->pcb_save.sv_87.sv_ex_sw)
#endif /* CPU_ENABLE_SSE */
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
static void fpusave __P((union savefpu *, u_char));
static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
struct save87 dummy;
union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
control = GET_FPU_CW(curproc);
status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
*exstat = status;
if (PCPU_GET(npxproc) != curproc)
curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
u_long *exstat;
critical_t s;
if (!npx_exists)
@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
*exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
frstor(&PCPU_GET(curpcb)->pcb_savefpu);
fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
struct save87 *addr;
union savefpu *addr;
{
stop_emulating();
fnsave(addr);
fpusave(addr, curproc->p_oncpu);
start_emulating();
PCPU_SET(npxproc, NULL);
}
static void
fpusave(addr, oncpu)
union savefpu *addr;
u_char oncpu;
{
static struct savexmm svxmm[MAXCPU];
if (!cpu_fxsr)
fnsave(addr);
else {
fxsave(&svxmm[oncpu]);
bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
}
}
static void
fpurstor(addr, oncpu)
union savefpu *addr;
u_char oncpu;
{
static struct savexmm svxmm[MAXCPU];
if (!cpu_fxsr)
frstor(addr);
else {
bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
fxrstor(&svxmm[oncpu]);
}
}
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)