For i386, remove config options CPU_DISABLE_CMPXCHG, CPU_DISABLE_SSE

and device npx.

This means that FPU is always initialized and handled when available,
and SSE+ register file and exception are handled when available.  This
makes the kernel FPU code much easier to maintain by the cost of
slight bloat for CPUs older than 25 years.

CPU_DISABLE_CMPXCHG outlived its usefulness, see the removed comment
explaining the original purpose.

Suggested by and discussed with:	bde
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	3 weeks
This commit is contained in:
Konstantin Belousov 2017-02-03 12:51:40 +00:00
parent 5b50ed8043
commit 57f6622f92
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=313154
22 changed files with 6 additions and 268 deletions

View File

@ -530,7 +530,7 @@ i386/ibcs2/ibcs2_xenix.c optional ibcs2
i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2
i386/ibcs2/imgact_coff.c optional ibcs2
i386/isa/elink.c optional ep | ie
i386/isa/npx.c optional npx
i386/isa/npx.c standard
i386/isa/pmtimer.c optional pmtimer
i386/isa/prof_machdep.c optional profiling-routine
i386/linux/imgact_linux.c optional compat_linux

View File

@ -50,8 +50,6 @@ CPU_BTB_EN opt_cpu.h
CPU_CYRIX_NO_LOCK opt_cpu.h
CPU_DIRECT_MAPPED_CACHE opt_cpu.h
CPU_DISABLE_5X86_LSSER opt_cpu.h
CPU_DISABLE_CMPXCHG opt_global.h # XXX global, unlike other CPU_*
CPU_DISABLE_SSE opt_cpu.h
CPU_ELAN opt_cpu.h
CPU_ELAN_PPS opt_cpu.h
CPU_ELAN_XTAL opt_cpu.h
@ -113,7 +111,6 @@ NETGRAPH_CRONYX opt_ng_cronyx.h
# Device options
DEV_APIC opt_apic.h
DEV_ATPIC opt_atpic.h
DEV_NPX opt_npx.h
# Debugging
NPX_DEBUG opt_npx.h

View File

@ -9,9 +9,6 @@ machine i386
device isa
options ISAPNP
# Floating point support.
device npx
# Pseudo devices.
device mem # Memory and kernel memory devices
device io # I/O device

View File

@ -115,15 +115,6 @@ cpu I686_CPU # aka Pentium Pro(tm)
# reorder). This option should not be used if you use memory mapped
# I/O device(s).
#
# CPU_DISABLE_CMPXCHG disables the CMPXCHG instruction on > i386 IA32
# machines. VmWare 3.x seems to emulate this instruction poorly, causing
# the guest OS to run very slowly. This problem appears to be fixed in
# VmWare 4.x, at least in version 4.5.2, so that enabling this option with
# VmWare 4.x will result in locking operations to be 20-30 times slower.
# Enabling this with an SMP kernel will cause the kernel to be unusable.
#
# CPU_DISABLE_SSE explicitly prevents I686_CPU from turning on SSE.
#
# CPU_ELAN enables support for AMDs ElanSC520 CPU.
# CPU_ELAN_PPS enables precision timestamp code.
# CPU_ELAN_XTAL sets the clock crystal frequency in Hz.
@ -201,8 +192,6 @@ options CPU_BLUELIGHTNING_FPU_OP_CACHE
options CPU_BTB_EN
options CPU_DIRECT_MAPPED_CACHE
options CPU_DISABLE_5X86_LSSER
options CPU_DISABLE_CMPXCHG
#options CPU_DISABLE_SSE
options CPU_ELAN
options CPU_ELAN_PPS
options CPU_ELAN_XTAL=32768000
@ -313,7 +302,7 @@ device apm_saver # Requires APM
#
# ISA bus
#
device isa # Required by npx(4)
device isa
#
# Options for `isa':
@ -395,8 +384,7 @@ device dpms # DPMS suspend & resume via VESA BIOS
options X86BIOS
#
# The Numeric Processing eXtension driver. This is non-optional.
device npx
# Hints for the non-optional Numeric Processing eXtension driver.
hint.npx.0.flags="0x0"
hint.npx.0.irq="13"

View File

@ -49,10 +49,6 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#include <machine/npx.h>
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
struct sysentvec elf32_freebsd_sysvec = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
@ -143,13 +139,10 @@ SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY,
void
elf32_dump_thread(struct thread *td, void *dst, size_t *off)
{
#ifdef CPU_ENABLE_SSE
void *buf;
#endif
size_t len;
len = 0;
#ifdef CPU_ENABLE_SSE
if (use_xsave) {
if (dst != NULL) {
npxgetregs(td);
@ -162,7 +155,6 @@ elf32_dump_thread(struct thread *td, void *dst, size_t *off)
len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
cpu_max_ext_state_size, NULL);
}
#endif
*off = len;
}

View File

@ -37,7 +37,6 @@
#include "opt_apic.h"
#include "opt_atpic.h"
#include "opt_hwpmc_hooks.h"
#include "opt_npx.h"
#include <machine/asmacros.h>
#include <machine/psl.h>

View File

@ -44,10 +44,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
#ifdef I486_CPU
static void init_5x86(void);
static void init_bluelightning(void);
@ -742,12 +738,10 @@ initializecpu(void)
default:
break;
}
#if defined(CPU_ENABLE_SSE)
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
cpu_fxsr = hw_instruction_sse = 1;
}
#endif
#if defined(PAE) || defined(PAE_TABLES)
if ((amd_feature & AMDID_NX) != 0) {
uint64_t msr;

View File

@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$");
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
#include "opt_mp_watchdog.h"
#include "opt_npx.h"
#include "opt_perfmon.h"
#include "opt_platform.h"
#include "opt_xbox.h"
@ -162,10 +161,6 @@ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
extern register_t init386(int first);
extern void dblfault_handler(void);
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
static void cpu_startup(void *);
static void fpstate_drop(struct thread *td);
static void get_fpcontext(struct thread *td, mcontext_t *mcp,
@ -621,14 +616,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs = td->td_frame;
oonstack = sigonstack(regs->tf_esp);
#ifdef CPU_ENABLE_SSE
if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
xfpusave = __builtin_alloca(xfpusave_len);
} else {
#else
{
#endif
xfpusave_len = 0;
xfpusave = NULL;
}
@ -2169,9 +2160,7 @@ init386(int first)
struct gate_descriptor *gdp;
int gsel_tss, metadata_missing, x, pa;
struct pcpu *pc;
#ifdef CPU_ENABLE_SSE
struct xstate_hdr *xhdr;
#endif
int late_console;
thread0.td_kstack = proc0kstack;
@ -2424,9 +2413,7 @@ init386(int first)
i386_kdb_init();
msgbufinit(msgbufp, msgbufsize);
#ifdef DEV_NPX
npxinit(true);
#endif
/*
* Set up thread0 pcb after npxinit calculated pcb + fpu save
* area size. Zero out the extended state header in fpu save
@ -2434,13 +2421,11 @@ init386(int first)
*/
thread0.td_pcb = get_pcb_td(&thread0);
bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
#ifdef CPU_ENABLE_SSE
if (use_xsave) {
xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
1);
xhdr->xstate_bv = xsave_mask;
}
#endif
PCPU_SET(curpcb, thread0.td_pcb);
/* Move esp0 in the tss to its final place. */
/* Note: -16 is so we can grow the trapframe if we came from vm86 */
@ -2707,17 +2692,11 @@ fill_fpregs(struct thread *td, struct fpreg *fpregs)
KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
P_SHOULDSTOP(td->td_proc),
("not suspended thread %p", td));
#ifdef DEV_NPX
npxgetregs(td);
#else
bzero(fpregs, sizeof(*fpregs));
#endif
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
(struct save87 *)fpregs);
else
#endif /* CPU_ENABLE_SSE */
bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
sizeof(*fpregs));
return (0);
@ -2727,17 +2706,13 @@ int
set_fpregs(struct thread *td, struct fpreg *fpregs)
{
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
npx_set_fpregs_xmm((struct save87 *)fpregs,
&get_pcb_user_save_td(td)->sv_xmm);
else
#endif /* CPU_ENABLE_SSE */
bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
sizeof(*fpregs));
#ifdef DEV_NPX
npxuserinited(td);
#endif
return (0);
}
@ -2846,20 +2821,12 @@ static void
get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
size_t xfpusave_len)
{
#ifdef CPU_ENABLE_SSE
size_t max_len, len;
#endif
#ifndef DEV_NPX
mcp->mc_fpformat = _MC_FPFMT_NODEV;
mcp->mc_ownedfp = _MC_FPOWNED_NONE;
bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
#else
mcp->mc_ownedfp = npxgetregs(td);
bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
sizeof(mcp->mc_fpstate));
mcp->mc_fpformat = npxformat();
#ifdef CPU_ENABLE_SSE
if (!use_xsave || xfpusave_len == 0)
return;
max_len = cpu_max_ext_state_size - sizeof(union savefpu);
@ -2871,8 +2838,6 @@ get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
mcp->mc_flags |= _MC_HASFPXSTATE;
mcp->mc_xfpustate_len = len;
bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
#endif
#endif
}
static int
@ -2893,16 +2858,10 @@ set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
error = 0;
} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
#ifdef DEV_NPX
fpstate = (union savefpu *)&mcp->mc_fpstate;
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
#endif
error = npxsetregs(td, fpstate, xfpustate, xfpustate_len);
#else
error = EINVAL;
#endif
} else
return (EINVAL);
return (error);
@ -2914,10 +2873,8 @@ fpstate_drop(struct thread *td)
KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
critical_enter();
#ifdef DEV_NPX
if (PCPU_GET(fpcurthread) == td)
npxdrop();
#endif
/*
* XXX force a full drop of the npx. The above only drops it if we
* owned it. npxgetregs() has the same bug in the !cpu_fxsr case.

View File

@ -41,9 +41,6 @@ __FBSDID("$FreeBSD$");
#ifndef DEV_APIC
#error The apic device is required for SMP, add "device apic" to your config file.
#endif
#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
#error SMP not supported with CPU_DISABLE_CMPXCHG
#endif
#endif /* not lint */
#include <sys/param.h>

View File

@ -152,10 +152,6 @@ __FBSDID("$FreeBSD$");
#include <machine/xbox.h>
#endif
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
#ifndef PMAP_SHPGPERPROC
#define PMAP_SHPGPERPROC 200
#endif
@ -4217,11 +4213,9 @@ pagezero(void *page)
{
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686) {
#if defined(CPU_ENABLE_SSE)
if (cpu_feature & CPUID_SSE2)
sse2_pagezero(page);
else
#endif
i686_pagezero(page);
} else
#endif

View File

@ -39,11 +39,6 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#include <machine/pcb.h>
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
#ifdef CPU_ENABLE_SSE
static int
cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
{
@ -114,12 +109,10 @@ cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
return (error);
}
#endif
static int
cpu_ptrace_xmm(struct thread *td, int req, void *addr, int data)
{
#ifdef CPU_ENABLE_SSE
struct savexmm *fpstate;
int error;
@ -152,9 +145,6 @@ cpu_ptrace_xmm(struct thread *td, int req, void *addr, int data)
}
return (error);
#else
return (EINVAL);
#endif
}
int

View File

@ -29,8 +29,6 @@
* $FreeBSD$
*/
#include "opt_npx.h"
#include <machine/asmacros.h>
#include <machine/cputypes.h>
#include <machine/pmap.h>

View File

@ -32,7 +32,6 @@
* $FreeBSD$
*/
#include "opt_npx.h"
#include "opt_sched.h"
#include <machine/asmacros.h>
@ -150,7 +149,6 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
#ifdef DEV_NPX
/* have we used fp, and need a save? */
cmpl %ecx,PCPU(FPCURTHREAD)
jne 1f
@ -158,7 +156,6 @@ ENTRY(cpu_switch)
call npxsave /* do it in a big C function */
popl %eax
1:
#endif
/* Save is done. Now fire up new thread. Leave old vmspace. */
movl 4(%esp),%edi

View File

@ -49,7 +49,6 @@ __FBSDID("$FreeBSD$");
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
#include "opt_kdb.h"
#include "opt_npx.h"
#include "opt_stack.h"
#include "opt_trap.h"
@ -335,13 +334,9 @@ trap(struct trapframe *frame)
break;
case T_ARITHTRAP: /* arithmetic trap */
#ifdef DEV_NPX
ucode = npxtrap_x87();
if (ucode == -1)
goto userout;
#else
ucode = 0;
#endif
i = SIGFPE;
break;
@ -475,13 +470,11 @@ trap(struct trapframe *frame)
break;
case T_DNA:
#ifdef DEV_NPX
KASSERT(PCB_USER_FPU(td->td_pcb),
("kernel FPU ctx has leaked"));
/* transparent fault (due to context switch "late") */
if (npxdna())
goto userout;
#endif
uprintf("pid %d killed due to lack of floating point\n",
p->p_pid);
i = SIGKILL;
@ -494,13 +487,9 @@ trap(struct trapframe *frame)
break;
case T_XMMFLT: /* SIMD floating-point exception */
#if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
ucode = npxtrap_sse();
if (ucode == -1)
goto userout;
#else
ucode = 0;
#endif
i = SIGFPE;
break;
#ifdef KDTRACE_HOOKS
@ -524,12 +513,10 @@ trap(struct trapframe *frame)
goto out;
case T_DNA:
#ifdef DEV_NPX
if (PCB_USER_FPU(td->td_pcb))
panic("Unregistered use of FPU in kernel");
if (npxdna())
goto out;
#endif
break;
case T_ARITHTRAP: /* arithmetic trap */

View File

@ -26,8 +26,6 @@
* $FreeBSD$
*/
#include "opt_npx.h"
#include <machine/asmacros.h> /* miscellaneous asm macros */
#include <machine/trap.h>
@ -63,7 +61,6 @@ ENTRY(vm86_bioscall)
pushl %edi
pushl %gs
#ifdef DEV_NPX
pushfl
cli
movl PCPU(CURTHREAD),%ecx
@ -77,7 +74,6 @@ ENTRY(vm86_bioscall)
popl %edx /* recover our pcb */
1:
popfl
#endif
movl SCR_VMFRAME(%edx),%ebx /* target frame location */
movl %ebx,%edi /* destination */

View File

@ -99,10 +99,6 @@ __FBSDID("$FreeBSD$");
#define NSFBUFS (512 + maxusers * 16)
#endif
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
_Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
"OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
_Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
@ -152,18 +148,14 @@ void *
alloc_fpusave(int flags)
{
void *res;
#ifdef CPU_ENABLE_SSE
struct savefpu_ymm *sf;
#endif
res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
#ifdef CPU_ENABLE_SSE
if (use_xsave) {
sf = (struct savefpu_ymm *)res;
bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
}
#endif
return (res);
}
/*
@ -203,12 +195,10 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
/* Ensure that td1's pcb is up to date. */
if (td1 == curthread)
td1->td_pcb->pcb_gs = rgs();
#ifdef DEV_NPX
critical_enter();
if (PCPU_GET(fpcurthread) == td1)
npxsave(td1->td_pcb->pcb_save);
critical_exit();
#endif
/* Point the pcb to the top of the stack */
pcb2 = get_pcb_td(td2);
@ -346,12 +336,10 @@ void
cpu_thread_exit(struct thread *td)
{
#ifdef DEV_NPX
critical_enter();
if (td == PCPU_GET(fpcurthread))
npxdrop();
critical_exit();
#endif
/* Disable any hardware breakpoints. */
if (td->td_pcb->pcb_flags & PCB_DBREGS) {
@ -392,21 +380,17 @@ void
cpu_thread_alloc(struct thread *td)
{
struct pcb *pcb;
#ifdef CPU_ENABLE_SSE
struct xstate_hdr *xhdr;
#endif
td->td_pcb = pcb = get_pcb_td(td);
td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
pcb->pcb_ext = NULL;
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
#ifdef CPU_ENABLE_SSE
if (use_xsave) {
xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
bzero(xhdr, sizeof(*xhdr));
xhdr->xstate_bv = xsave_mask;
}
#endif
}
void

View File

@ -170,33 +170,6 @@ struct __hack
* Returns 0 on failure, non-zero on success
*/
#ifdef CPU_DISABLE_CMPXCHG
static __inline int
atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
{
u_char res;
__asm __volatile(
" pushfl ; "
" cli ; "
" cmpl %3,%1 ; "
" jne 1f ; "
" movl %2,%1 ; "
"1: "
" sete %0 ; "
" popfl ; "
"# atomic_cmpset_int"
: "=q" (res), /* 0 */
"+m" (*dst) /* 1 */
: "r" (src), /* 2 */
"r" (expect) /* 3 */
: "memory");
return (res);
}
#else /* !CPU_DISABLE_CMPXCHG */
static __inline int
atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src)
{
@ -233,8 +206,6 @@ atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src)
return (res);
}
#endif /* CPU_DISABLE_CMPXCHG */
/*
* Atomically add the value of v to the integer pointed to by p and return
* the previous value of *p.

View File

@ -74,10 +74,6 @@ __FBSDID("$FreeBSD$");
#include <isa/isavar.h>
#endif
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
/*
* 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
*/
@ -93,7 +89,6 @@ __FBSDID("$FreeBSD$");
#define fp_divide_by_0() __asm __volatile( \
"fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr)))
#ifdef CPU_ENABLE_SSE
#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr))
@ -130,7 +125,6 @@ xsaveopt(char *addr, uint64_t mask)
__asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) :
"memory");
}
#endif
#else /* !(__GNUCLIKE_ASM && !lint) */
void fldcw(u_short cw);
@ -141,7 +135,6 @@ void fnstcw(caddr_t addr);
void fnstsw(caddr_t addr);
void fp_divide_by_0(void);
void frstor(caddr_t addr);
#ifdef CPU_ENABLE_SSE
void fxsave(caddr_t addr);
void fxrstor(caddr_t addr);
void ldmxcsr(u_int csr);
@ -149,14 +142,12 @@ void stmxcsr(u_int *csr);
void xrstor(char *addr, uint64_t mask);
void xsave(char *addr, uint64_t mask);
void xsaveopt(char *addr, uint64_t mask);
#endif
#endif /* __GNUCLIKE_ASM && !lint */
#define start_emulating() load_cr0(rcr0() | CR0_TS)
#define stop_emulating() clts()
#ifdef CPU_ENABLE_SSE
#define GET_FPU_CW(thread) \
(cpu_fxsr ? \
(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
@ -171,16 +162,7 @@ void xsaveopt(char *addr, uint64_t mask);
else \
(savefpu)->sv_87.sv_env.en_cw = (value); \
} while (0)
#else /* CPU_ENABLE_SSE */
#define GET_FPU_CW(thread) \
(thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
#define GET_FPU_SW(thread) \
(thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
#define SET_FPU_CW(savefpu, value) \
(savefpu)->sv_87.sv_env.en_cw = (value)
#endif /* CPU_ENABLE_SSE */
#ifdef CPU_ENABLE_SSE
CTASSERT(sizeof(union savefpu) == 512);
CTASSERT(sizeof(struct xstate_hdr) == 64);
CTASSERT(sizeof(struct savefpu_ymm) == 832);
@ -200,7 +182,6 @@ CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) &&
X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm));
static void fpu_clean_state(void);
#endif
static void fpusave(union savefpu *);
static void fpurstor(union savefpu *);
@ -210,21 +191,17 @@ int hw_float;
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
&hw_float, 0, "Floating point instructions executed in hardware");
#ifdef CPU_ENABLE_SSE
int use_xsave;
uint64_t xsave_mask;
#endif
static uma_zone_t fpu_save_area_zone;
static union savefpu *npx_initialstate;
#ifdef CPU_ENABLE_SSE
struct xsave_area_elm_descr {
u_int offset;
u_int size;
} *xsave_area_desc;
static int use_xsaveopt;
#endif
static volatile u_int npx_traps_while_probing;
@ -332,7 +309,6 @@ npx_probe(void)
return (hw_float);
}
#ifdef CPU_ENABLE_SSE
/*
* Enable XSAVE if supported and allowed by user.
* Calculate the xsave_mask.
@ -368,7 +344,7 @@ npxinit_bsp1(void)
if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
use_xsaveopt = 1;
}
#endif
/*
* Calculate the fpu save area size.
@ -376,7 +352,6 @@ npxinit_bsp1(void)
static void
npxinit_bsp2(void)
{
#ifdef CPU_ENABLE_SSE
u_int cp[4];
if (use_xsave) {
@ -389,7 +364,6 @@ npxinit_bsp2(void)
do_cpuid(1, cp);
cpu_feature2 = cp[2];
} else
#endif
cpu_max_ext_state_size = sizeof(union savefpu);
}
@ -401,25 +375,19 @@ npxinit(bool bsp)
{
static union savefpu dummy;
register_t saveintr;
#ifdef CPU_ENABLE_SSE
u_int mxcsr;
#endif
u_short control;
if (bsp) {
if (!npx_probe())
return;
#ifdef CPU_ENABLE_SSE
npxinit_bsp1();
#endif
}
#ifdef CPU_ENABLE_SSE
if (use_xsave) {
load_cr4(rcr4() | CR4_XSAVE);
load_xcr(XCR0, xsave_mask);
}
#endif
/*
* XCR0 shall be set up before CPU can report the save area size.
@ -436,20 +404,16 @@ npxinit(bool bsp)
*/
saveintr = intr_disable();
stop_emulating();
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
fninit();
else
#endif
fnsave(&dummy);
control = __INITIAL_NPXCW__;
fldcw(control);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
mxcsr = __INITIAL_MXCSR__;
ldmxcsr(mxcsr);
}
#endif
start_emulating();
intr_restore(saveintr);
}
@ -463,9 +427,7 @@ static void
npxinitstate(void *arg __unused)
{
register_t saveintr;
#ifdef CPU_ENABLE_SSE
int cp[4], i, max_ext_n;
#endif
if (!hw_float)
return;
@ -476,7 +438,6 @@ npxinitstate(void *arg __unused)
stop_emulating();
fpusave(npx_initialstate);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr) {
if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask)
cpu_mxcsr_mask =
@ -498,11 +459,9 @@ npxinitstate(void *arg __unused)
bzero(npx_initialstate->sv_xmm.sv_xmm,
sizeof(npx_initialstate->sv_xmm.sv_xmm));
} else
#endif
bzero(npx_initialstate->sv_87.sv_ac,
sizeof(npx_initialstate->sv_87.sv_ac));
#ifdef CPU_ENABLE_SSE
/*
* Create a table describing the layout of the CPU Extended
* Save Area.
@ -527,7 +486,6 @@ npxinitstate(void *arg __unused)
xsave_area_desc[i].size = cp[0];
}
}
#endif
fpu_save_area_zone = uma_zcreate("FPU_save_area",
cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
@ -577,10 +535,8 @@ npxformat(void)
if (!hw_float)
return (_MC_FPFMT_NODEV);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
return (_MC_FPFMT_XMM);
#endif
return (_MC_FPFMT_387);
}
@ -801,7 +757,6 @@ npxtrap_x87(void)
return (fpetable[status & ((~control & 0x3f) | 0x40)]);
}
#ifdef CPU_ENABLE_SSE
int
npxtrap_sse(void)
{
@ -821,7 +776,6 @@ npxtrap_sse(void)
critical_exit();
return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
}
#endif
/*
* Implement device not available (DNA) exception
@ -860,10 +814,8 @@ npxdna(void)
*/
PCPU_SET(fpcurthread, curthread);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
fpu_clean_state();
#endif
if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
/*
@ -905,11 +857,9 @@ npxsave(addr)
{
stop_emulating();
#ifdef CPU_ENABLE_SSE
if (use_xsaveopt)
xsaveopt((char *)addr, xsave_mask);
else
#endif
fpusave(addr);
start_emulating();
PCPU_SET(fpcurthread, NULL);
@ -960,9 +910,7 @@ npxdrop(void)
* Discard pending exceptions in the !cpu_fxsr case so that unmasked
* ones don't cause a panic on the next frstor.
*/
#ifdef CPU_ENABLE_SSE
if (!cpu_fxsr)
#endif
fnclex();
td = PCPU_GET(fpcurthread);
@ -982,11 +930,9 @@ int
npxgetregs(struct thread *td)
{
struct pcb *pcb;
#ifdef CPU_ENABLE_SSE
uint64_t *xstate_bv, bit;
char *sa;
int max_ext_n, i;
#endif
int owned;
if (!hw_float)
@ -1003,9 +949,7 @@ npxgetregs(struct thread *td)
critical_enter();
if (td == PCPU_GET(fpcurthread)) {
fpusave(get_pcb_user_save_pcb(pcb));
#ifdef CPU_ENABLE_SSE
if (!cpu_fxsr)
#endif
/*
* fnsave initializes the FPU and destroys whatever
* context it contains. Make sure the FPU owner
@ -1017,7 +961,6 @@ npxgetregs(struct thread *td)
owned = _MC_FPOWNED_PCB;
}
critical_exit();
#ifdef CPU_ENABLE_SSE
if (use_xsave) {
/*
* Handle partially saved state.
@ -1040,7 +983,6 @@ npxgetregs(struct thread *td)
*xstate_bv |= bit;
}
}
#endif
return (owned);
}
@ -1055,7 +997,6 @@ npxuserinited(struct thread *td)
pcb->pcb_flags |= PCB_NPXUSERINITDONE;
}
#ifdef CPU_ENABLE_SSE
int
npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
{
@ -1093,16 +1034,13 @@ npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
return (0);
}
#endif
int
npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
size_t xfpustate_size)
{
struct pcb *pcb;
#ifdef CPU_ENABLE_SSE
int error;
#endif
if (!hw_float)
return (ENXIO);
@ -1110,14 +1048,12 @@ npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
pcb = td->td_pcb;
critical_enter();
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
#ifdef CPU_ENABLE_SSE
error = npxsetxstate(td, xfpustate, xfpustate_size);
if (error != 0) {
critical_exit();
return (error);
}
if (!cpu_fxsr)
#endif
fnclex(); /* As in npxdrop(). */
bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
fpurstor(get_pcb_user_save_td(td));
@ -1125,11 +1061,9 @@ npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
} else {
critical_exit();
#ifdef CPU_ENABLE_SSE
error = npxsetxstate(td, xfpustate, xfpustate_size);
if (error != 0)
return (error);
#endif
bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
npxuserinited(td);
}
@ -1141,17 +1075,14 @@ fpusave(addr)
union savefpu *addr;
{
#ifdef CPU_ENABLE_SSE
if (use_xsave)
xsave((char *)addr, xsave_mask);
else if (cpu_fxsr)
fxsave(addr);
else
#endif
fnsave(addr);
}
#ifdef CPU_ENABLE_SSE
static void
npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87)
{
@ -1216,7 +1147,6 @@ npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm)
penv_xmm->en_tw |= 1 << i;
}
}
#endif /* CPU_ENABLE_SSE */
void
npx_get_fsave(void *addr)
@ -1227,11 +1157,9 @@ npx_get_fsave(void *addr)
td = curthread;
npxgetregs(td);
sv = get_pcb_user_save_td(td);
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
npx_fill_fpregs_xmm1(&sv->sv_xmm, addr);
else
#endif
bcopy(sv, addr, sizeof(struct env87) +
sizeof(struct fpacc87[8]));
}
@ -1243,18 +1171,15 @@ npx_set_fsave(void *addr)
int error;
bzero(&sv, sizeof(sv));
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
npx_set_fpregs_xmm(addr, &sv.sv_xmm);
else
#endif
bcopy(addr, &sv, sizeof(struct env87) +
sizeof(struct fpacc87[8]));
error = npxsetregs(curthread, &sv, NULL, 0);
return (error);
}
#ifdef CPU_ENABLE_SSE
/*
* On AuthenticAMD processors, the fxrstor instruction does not restore
* the x87's stored last instruction pointer, last data pointer, and last
@ -1285,19 +1210,16 @@ fpu_clean_state(void)
*/
__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
}
#endif /* CPU_ENABLE_SSE */
static void
fpurstor(union savefpu *addr)
{
#ifdef CPU_ENABLE_SSE
if (use_xsave)
xrstor((char *)addr, xsave_mask);
else if (cpu_fxsr)
fxrstor(addr);
else
#endif
frstor(addr);
}

View File

@ -47,10 +47,6 @@ __FBSDID("$FreeBSD$");
#include <i386/linux/linux_proto.h>
#include <compat/linux/linux_signal.h>
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
/*
* Linux ptrace requests numbers. Mostly identical to FreeBSD,
* except for MD ones and PT_ATTACH/PT_DETACH.
@ -216,7 +212,6 @@ struct linux_pt_fpxreg {
l_long padding[56];
};
#ifdef CPU_ENABLE_SSE
static int
linux_proc_read_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs)
{
@ -238,7 +233,6 @@ linux_proc_write_fpxregs(struct thread *td, struct linux_pt_fpxreg *fpxregs)
bcopy(fpxregs, &get_pcb_user_save_td(td)->sv_xmm, sizeof(*fpxregs));
return (0);
}
#endif
int
linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
@ -330,14 +324,11 @@ linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
}
break;
case PTRACE_SETFPXREGS:
#ifdef CPU_ENABLE_SSE
error = copyin((void *)uap->data, &r.fpxreg, sizeof(r.fpxreg));
if (error)
break;
#endif
/* FALL THROUGH */
case PTRACE_GETFPXREGS: {
#ifdef CPU_ENABLE_SSE
struct proc *p;
struct thread *td2;
@ -411,9 +402,6 @@ linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
fail:
PROC_UNLOCK(p);
#else
error = EIO;
#endif
break;
}
case PTRACE_PEEKUSR:

View File

@ -35,9 +35,6 @@ __FBSDID("$FreeBSD$");
#else
#include "opt_apic.h"
#endif
#ifdef __i386__
#include "opt_npx.h"
#endif
#include <sys/param.h>
#include <sys/bus.h>
@ -213,7 +210,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
if (savectx(pcb)) {
#ifdef __amd64__
fpususpend(susppcbs[0]->sp_fpususpend);
#elif defined(DEV_NPX)
#else
npxsuspend(susppcbs[0]->sp_fpususpend);
#endif
#ifdef SMP
@ -250,7 +247,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
} else {
#ifdef __amd64__
fpuresume(susppcbs[0]->sp_fpususpend);
#elif defined(DEV_NPX)
#else
npxresume(susppcbs[0]->sp_fpususpend);
#endif
}

View File

@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include "opt_mp_watchdog.h"
#include "opt_platform.h"
#ifdef __i386__
#include "opt_npx.h"
#include "opt_apic.h"
#include "opt_xbox.h"
#endif

View File

@ -67,10 +67,6 @@ __FBSDID("$FreeBSD$");
#include <x86/vmware.h>
#ifdef __i386__
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
#define CPU_ENABLE_SSE
#endif
#define IDENTBLUE_CYRIX486 0
#define IDENTBLUE_IBMCPU 1
#define IDENTBLUE_CYRIXM2 2
@ -105,10 +101,8 @@ u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */
u_int cpu_procinfo2; /* Multicore info */
char cpu_vendor[20]; /* CPU Origin code */
u_int cpu_vendor_id; /* CPU vendor ID */
#if defined(__amd64__) || defined(CPU_ENABLE_SSE)
u_int cpu_fxsr; /* SSE enabled */
u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
#endif
u_int cpu_clflush_line_size = 32;
u_int cpu_stdext_feature;
u_int cpu_stdext_feature2;