1993-06-12 14:58:17 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1990 William Jolitz.
|
|
|
|
* Copyright (c) 1991 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
ALL:
Removed patch kit headers and rcsid strings, add $Id$.
isa.c:
Removed old #ifdef notyet isa_configure code, since it will never be
used, and I have done 90% of what it attempted to.
Add conflict checking code that searchs back through the devtab's looking
for any device that has already been found that may conflict with what
we are about to probe. Checks are mode for I/O address, memory address,
IRQ, and DRQ. This should stop the screwing up of any device that has
alread been found by other device probes.
Print out messages when we are not going to probe a device due to
a conflict so the user knows WHY something was not found. For example:
aha0 not probed due to irq conflict with ahb0 at 11
Now print out a message when a device is not found so the user knows
that it was probed for, but could not be found. For example:
ed1 not found at 0x320
For devices that have I/O address < 0x100 say that they are on the
motherboard, not on isa! The 0x100 magic number is per ISA spec. It
may seem funny that pc0 and sc0 report as being on the motherboard, but
this is due to the fact that the I/O address used is that of the keyboard
controller which IS on the motherboard. We really need to split the
keyboard probe from the display probe. It is completly legal to build
a pc with out one or the other, or even with out both!
npx.c:
Return -1 from the probe routine if we are using the Emulator so
that the i/o addresses are not printed, this is the same trick used
for 486's.
Do not print the ``Errors reported via Exception 16'', and
``Errors reported via IRQ 13'' messages any more, since these just lead
to more user confusion that anything. It still prints the message
``Error reporting broken, using 387 emulator'' so that the person is
aware that there mother board is ill.
1993-10-13 15:59:30 +00:00
|
|
|
* from: @(#)npx.c 7.2 (Berkeley) 5/12/91
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
|
|
|
|
2003-06-02 16:32:55 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2001-07-12 06:32:51 +00:00
|
|
|
#include "opt_cpu.h"
|
2002-01-30 12:41:12 +00:00
|
|
|
#include "opt_isa.h"
|
2001-10-21 05:18:30 +00:00
|
|
|
#include "opt_npx.h"
|
1996-01-04 19:51:50 +00:00
|
|
|
|
1994-08-13 03:50:34 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
1999-04-16 21:22:55 +00:00
|
|
|
#include <sys/bus.h>
|
1995-10-28 13:07:28 +00:00
|
|
|
#include <sys/kernel.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/lock.h>
|
1998-02-12 21:41:10 +00:00
|
|
|
#include <sys/malloc.h>
|
1999-04-16 21:22:55 +00:00
|
|
|
#include <sys/module.h>
|
2001-01-20 02:30:58 +00:00
|
|
|
#include <sys/mutex.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/proc.h>
|
2003-11-03 21:53:38 +00:00
|
|
|
#include <sys/smp.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/sysctl.h>
|
1999-04-16 21:22:55 +00:00
|
|
|
#include <machine/bus.h>
|
|
|
|
#include <sys/rman.h>
|
1997-01-29 13:46:28 +00:00
|
|
|
#ifdef NPX_DEBUG
|
1994-11-14 14:59:06 +00:00
|
|
|
#include <sys/syslog.h>
|
1997-01-29 13:46:28 +00:00
|
|
|
#endif
|
1994-11-14 14:59:06 +00:00
|
|
|
#include <sys/signalvar.h>
|
2014-11-02 21:34:24 +00:00
|
|
|
#include <vm/uma.h>
|
1994-10-23 21:28:03 +00:00
|
|
|
|
1997-04-22 06:55:47 +00:00
|
|
|
#include <machine/asmacros.h>
|
1997-10-28 11:43:57 +00:00
|
|
|
#include <machine/cputypes.h>
|
|
|
|
#include <machine/frame.h>
|
1996-06-25 20:31:01 +00:00
|
|
|
#include <machine/md_var.h>
|
1997-05-31 09:27:31 +00:00
|
|
|
#include <machine/pcb.h>
|
1997-10-28 11:43:57 +00:00
|
|
|
#include <machine/psl.h>
|
1999-04-16 21:22:55 +00:00
|
|
|
#include <machine/resource.h>
|
1994-08-13 03:50:34 +00:00
|
|
|
#include <machine/specialreg.h>
|
1997-10-28 11:43:57 +00:00
|
|
|
#include <machine/segments.h>
|
2002-09-16 19:25:59 +00:00
|
|
|
#include <machine/ucontext.h>
|
1994-10-23 21:28:03 +00:00
|
|
|
|
2003-11-03 21:53:38 +00:00
|
|
|
#include <machine/intr_machdep.h>
|
2008-08-15 21:43:38 +00:00
|
|
|
#ifdef XEN
|
Implement vector callback for PVHVM and unify event channel implementations
Re-structure Xen HVM support so that:
- Xen is detected and hypercalls can be performed very
early in system startup.
- Xen interrupt services are implemented using FreeBSD's native
interrupt delivery infrastructure.
- the Xen interrupt service implementation is shared between PV
and HVM guests.
- Xen interrupt handlers can optionally use a filter handler
in order to avoid the overhead of dispatch to an interrupt
thread.
- interrupt load can be distributed among all available CPUs.
- the overhead of accessing the emulated local and I/O apics
on HVM is removed for event channel port events.
- a similar optimization can eventually, and fairly easily,
be used to optimize MSI.
Early Xen detection, HVM refactoring, PVHVM interrupt infrastructure,
and misc Xen cleanups:
Sponsored by: Spectra Logic Corporation
Unification of PV & HVM interrupt infrastructure, bug fixes,
and misc Xen cleanups:
Submitted by: Roger Pau Monné
Sponsored by: Citrix Systems R&D
sys/x86/x86/local_apic.c:
sys/amd64/include/apicvar.h:
sys/i386/include/apicvar.h:
sys/amd64/amd64/apic_vector.S:
sys/i386/i386/apic_vector.s:
sys/amd64/amd64/machdep.c:
sys/i386/i386/machdep.c:
sys/i386/xen/exception.s:
sys/x86/include/segments.h:
Reserve IDT vector 0x93 for the Xen event channel upcall
interrupt handler. On Hypervisors that support the direct
vector callback feature, we can request that this vector be
called directly by an injected HVM interrupt event, instead
of a simulated PCI interrupt on the Xen platform PCI device.
This avoids all of the overhead of dealing with the emulated
I/O APIC and local APIC. It also means that the Hypervisor
can inject these events on any CPU, allowing upcalls for
different ports to be handled in parallel.
sys/amd64/amd64/mp_machdep.c:
sys/i386/i386/mp_machdep.c:
Map Xen per-vcpu area during AP startup.
sys/amd64/include/intr_machdep.h:
sys/i386/include/intr_machdep.h:
Increase the FreeBSD IRQ vector table to include space
for event channel interrupt sources.
sys/amd64/include/pcpu.h:
sys/i386/include/pcpu.h:
Remove Xen HVM per-cpu variable data. These fields are now
allocated via the dynamic per-cpu scheme. See xen_intr.c
for details.
sys/amd64/include/xen/hypercall.h:
sys/dev/xen/blkback/blkback.c:
sys/i386/include/xen/xenvar.h:
sys/i386/xen/clock.c:
sys/i386/xen/xen_machdep.c:
sys/xen/gnttab.c:
Prefer FreeBSD primatives to Linux ones in Xen support code.
sys/amd64/include/xen/xen-os.h:
sys/i386/include/xen/xen-os.h:
sys/xen/xen-os.h:
sys/dev/xen/balloon/balloon.c:
sys/dev/xen/blkback/blkback.c:
sys/dev/xen/blkfront/blkfront.c:
sys/dev/xen/console/xencons_ring.c:
sys/dev/xen/control/control.c:
sys/dev/xen/netback/netback.c:
sys/dev/xen/netfront/netfront.c:
sys/dev/xen/xenpci/xenpci.c:
sys/i386/i386/machdep.c:
sys/i386/include/pmap.h:
sys/i386/include/xen/xenfunc.h:
sys/i386/isa/npx.c:
sys/i386/xen/clock.c:
sys/i386/xen/mp_machdep.c:
sys/i386/xen/mptable.c:
sys/i386/xen/xen_clock_util.c:
sys/i386/xen/xen_machdep.c:
sys/i386/xen/xen_rtc.c:
sys/xen/evtchn/evtchn_dev.c:
sys/xen/features.c:
sys/xen/gnttab.c:
sys/xen/gnttab.h:
sys/xen/hvm.h:
sys/xen/xenbus/xenbus.c:
sys/xen/xenbus/xenbus_if.m:
sys/xen/xenbus/xenbusb_front.c:
sys/xen/xenbus/xenbusvar.h:
sys/xen/xenstore/xenstore.c:
sys/xen/xenstore/xenstore_dev.c:
sys/xen/xenstore/xenstorevar.h:
Pull common Xen OS support functions/settings into xen/xen-os.h.
sys/amd64/include/xen/xen-os.h:
sys/i386/include/xen/xen-os.h:
sys/xen/xen-os.h:
Remove constants, macros, and functions unused in FreeBSD's Xen
support.
sys/xen/xen-os.h:
sys/i386/xen/xen_machdep.c:
sys/x86/xen/hvm.c:
Introduce new functions xen_domain(), xen_pv_domain(), and
xen_hvm_domain(). These are used in favor of #ifdefs so that
FreeBSD can dynamically detect and adapt to the presence of
a hypervisor. The goal is to have an HVM optimized GENERIC,
but more is necessary before this is possible.
sys/amd64/amd64/machdep.c:
sys/dev/xen/xenpci/xenpcivar.h:
sys/dev/xen/xenpci/xenpci.c:
sys/x86/xen/hvm.c:
sys/sys/kernel.h:
Refactor magic ioport, Hypercall table and Hypervisor shared
information page setup, and move it to a dedicated HVM support
module.
HVM mode initialization is now triggered during the
SI_SUB_HYPERVISOR phase of system startup. This currently
occurs just after the kernel VM is fully setup which is
just enough infrastructure to allow the hypercall table
and shared info page to be properly mapped.
sys/xen/hvm.h:
sys/x86/xen/hvm.c:
Add definitions and a method for configuring Hypervisor event
delievery via a direct vector callback.
sys/amd64/include/xen/xen-os.h:
sys/x86/xen/hvm.c:
sys/conf/files:
sys/conf/files.amd64:
sys/conf/files.i386:
Adjust kernel build to reflect the refactoring of early
Xen startup code and Xen interrupt services.
sys/dev/xen/blkback/blkback.c:
sys/dev/xen/blkfront/blkfront.c:
sys/dev/xen/blkfront/block.h:
sys/dev/xen/control/control.c:
sys/dev/xen/evtchn/evtchn_dev.c:
sys/dev/xen/netback/netback.c:
sys/dev/xen/netfront/netfront.c:
sys/xen/xenstore/xenstore.c:
sys/xen/evtchn/evtchn_dev.c:
sys/dev/xen/console/console.c:
sys/dev/xen/console/xencons_ring.c
Adjust drivers to use new xen_intr_*() API.
sys/dev/xen/blkback/blkback.c:
Since blkback defers all event handling to a taskqueue,
convert this task queue to a "fast" taskqueue, and schedule
it via an interrupt filter. This avoids an unnecessary
ithread context switch.
sys/xen/xenstore/xenstore.c:
The xenstore driver is MPSAFE. Indicate as much when
registering its interrupt handler.
sys/xen/xenbus/xenbus.c:
sys/xen/xenbus/xenbusvar.h:
Remove unused event channel APIs.
sys/xen/evtchn.h:
Remove all kernel Xen interrupt service API definitions
from this file. It is now only used for structure and
ioctl definitions related to the event channel userland
device driver.
Update the definitions in this file to match those from
NetBSD. Implementing this interface will be necessary for
Dom0 support.
sys/xen/evtchn/evtchnvar.h:
Add a header file for implemenation internal APIs related
to managing event channels event delivery. This is used
to allow, for example, the event channel userland device
driver to access low-level routines that typical kernel
consumers of event channel services should never access.
sys/xen/interface/event_channel.h:
sys/xen/xen_intr.h:
Standardize on the evtchn_port_t type for referring to
an event channel port id. In order to prevent low-level
event channel APIs from leaking to kernel consumers who
should not have access to this data, the type is defined
twice: Once in the Xen provided event_channel.h, and again
in xen/xen_intr.h. The double declaration is protected by
__XEN_EVTCHN_PORT_DEFINED__ to ensure it is never declared
twice within a given compilation unit.
sys/xen/xen_intr.h:
sys/xen/evtchn/evtchn.c:
sys/x86/xen/xen_intr.c:
sys/dev/xen/xenpci/evtchn.c:
sys/dev/xen/xenpci/xenpcivar.h:
New implementation of Xen interrupt services. This is
similar in many respects to the i386 PV implementation with
the exception that events for bound to event channel ports
(i.e. not IPI, virtual IRQ, or physical IRQ) are further
optimized to avoid mask/unmask operations that aren't
necessary for these edge triggered events.
Stubs exist for supporting physical IRQ binding, but will
need additional work before this implementation can be
fully shared between PV and HVM.
sys/amd64/amd64/mp_machdep.c:
sys/i386/i386/mp_machdep.c:
sys/i386/xen/mp_machdep.c
sys/x86/xen/hvm.c:
Add support for placing vcpu_info into an arbritary memory
page instead of using HYPERVISOR_shared_info->vcpu_info.
This allows the creation of domains with more than 32 vcpus.
sys/i386/i386/machdep.c:
sys/i386/xen/clock.c:
sys/i386/xen/xen_machdep.c:
sys/i386/xen/exception.s:
Add support for new event channle implementation.
2013-08-29 19:52:18 +00:00
|
|
|
#include <xen/xen-os.h>
|
2008-12-29 06:31:03 +00:00
|
|
|
#include <xen/hypervisor.h>
|
2008-08-15 21:43:38 +00:00
|
|
|
#endif
|
|
|
|
|
2002-01-30 12:41:12 +00:00
|
|
|
#ifdef DEV_ISA
|
2000-05-04 23:57:32 +00:00
|
|
|
#include <isa/isavar.h>
|
2002-01-30 12:41:12 +00:00
|
|
|
#endif
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2005-07-02 20:06:44 +00:00
|
|
|
#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
|
2002-09-07 07:02:12 +00:00
|
|
|
#define CPU_ENABLE_SSE
|
|
|
|
#endif
|
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
|
|
|
* 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
|
|
|
|
*/
|
|
|
|
|
2005-03-02 21:33:29 +00:00
|
|
|
#if defined(__GNUCLIKE_ASM) && !defined(lint)
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2010-07-26 23:20:55 +00:00
|
|
|
#define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw))
|
2010-07-26 22:16:36 +00:00
|
|
|
#define fnclex() __asm __volatile("fnclex")
|
|
|
|
#define fninit() __asm __volatile("fninit")
|
1998-04-15 18:58:09 +00:00
|
|
|
#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
|
|
|
|
#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
|
2010-07-26 21:24:52 +00:00
|
|
|
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr)))
|
2010-07-26 22:16:36 +00:00
|
|
|
#define fp_divide_by_0() __asm __volatile( \
|
|
|
|
"fldz; fld1; fdiv %st,%st(1); fnop")
|
|
|
|
#define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr)))
|
2001-08-23 01:03:56 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
2010-07-26 22:16:36 +00:00
|
|
|
#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
|
2001-07-12 06:32:51 +00:00
|
|
|
#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
|
2014-11-02 21:34:24 +00:00
|
|
|
#define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr))
|
2012-07-21 21:39:23 +00:00
|
|
|
#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
|
2014-11-02 22:58:30 +00:00
|
|
|
|
|
|
|
static __inline void
|
|
|
|
xrstor(char *addr, uint64_t mask)
|
|
|
|
{
|
|
|
|
uint32_t low, hi;
|
|
|
|
|
|
|
|
low = mask;
|
|
|
|
hi = mask >> 32;
|
|
|
|
__asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
xsave(char *addr, uint64_t mask)
|
|
|
|
{
|
|
|
|
uint32_t low, hi;
|
|
|
|
|
|
|
|
low = mask;
|
|
|
|
hi = mask >> 32;
|
|
|
|
__asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
|
|
|
|
"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static __inline void
|
|
|
|
xsaveopt(char *addr, uint64_t mask)
|
|
|
|
{
|
|
|
|
uint32_t low, hi;
|
|
|
|
|
|
|
|
low = mask;
|
|
|
|
hi = mask >> 32;
|
|
|
|
__asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) :
|
|
|
|
"memory");
|
|
|
|
}
|
2001-08-23 01:03:56 +00:00
|
|
|
#endif
|
2005-03-02 21:33:29 +00:00
|
|
|
#else /* !(__GNUCLIKE_ASM && !lint) */
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2010-07-26 23:20:55 +00:00
|
|
|
void fldcw(u_short cw);
|
2002-03-23 16:01:49 +00:00
|
|
|
void fnclex(void);
|
|
|
|
void fninit(void);
|
|
|
|
void fnsave(caddr_t addr);
|
|
|
|
void fnstcw(caddr_t addr);
|
|
|
|
void fnstsw(caddr_t addr);
|
|
|
|
void fp_divide_by_0(void);
|
|
|
|
void frstor(caddr_t addr);
|
2001-08-23 01:03:56 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
2002-03-23 16:01:49 +00:00
|
|
|
void fxsave(caddr_t addr);
|
|
|
|
void fxrstor(caddr_t addr);
|
2014-11-02 21:34:24 +00:00
|
|
|
void ldmxcsr(u_int csr);
|
2012-07-30 19:26:02 +00:00
|
|
|
void stmxcsr(u_int *csr);
|
2014-11-02 22:58:30 +00:00
|
|
|
void xrstor(char *addr, uint64_t mask);
|
|
|
|
void xsave(char *addr, uint64_t mask);
|
|
|
|
void xsaveopt(char *addr, uint64_t mask);
|
2001-08-23 01:03:56 +00:00
|
|
|
#endif
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2005-03-02 21:33:29 +00:00
|
|
|
#endif /* __GNUCLIKE_ASM && !lint */
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2012-07-09 20:55:39 +00:00
|
|
|
#ifdef XEN
|
|
|
|
#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
|
|
|
|
#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
|
|
|
|
#else
|
|
|
|
#define start_emulating() load_cr0(rcr0() | CR0_TS)
|
|
|
|
#define stop_emulating() clts()
|
|
|
|
#endif
|
|
|
|
|
2001-07-12 06:32:51 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
2001-09-12 08:38:13 +00:00
|
|
|
#define GET_FPU_CW(thread) \
|
2001-07-12 06:32:51 +00:00
|
|
|
(cpu_fxsr ? \
|
2010-06-05 15:59:59 +00:00
|
|
|
(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
|
|
|
|
(thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
|
2001-09-12 08:38:13 +00:00
|
|
|
#define GET_FPU_SW(thread) \
|
2001-07-12 06:32:51 +00:00
|
|
|
(cpu_fxsr ? \
|
2010-06-05 15:59:59 +00:00
|
|
|
(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
|
|
|
|
(thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
|
2009-03-05 19:42:11 +00:00
|
|
|
#define SET_FPU_CW(savefpu, value) do { \
|
|
|
|
if (cpu_fxsr) \
|
|
|
|
(savefpu)->sv_xmm.sv_env.en_cw = (value); \
|
|
|
|
else \
|
|
|
|
(savefpu)->sv_87.sv_env.en_cw = (value); \
|
|
|
|
} while (0)
|
2001-07-12 06:32:51 +00:00
|
|
|
#else /* CPU_ENABLE_SSE */
|
2001-09-12 08:38:13 +00:00
|
|
|
#define GET_FPU_CW(thread) \
|
2010-06-05 15:59:59 +00:00
|
|
|
(thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
|
2001-09-12 08:38:13 +00:00
|
|
|
#define GET_FPU_SW(thread) \
|
2010-06-05 15:59:59 +00:00
|
|
|
(thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
|
2009-03-05 19:42:11 +00:00
|
|
|
#define SET_FPU_CW(savefpu, value) \
|
|
|
|
(savefpu)->sv_87.sv_env.en_cw = (value)
|
2001-07-12 06:32:51 +00:00
|
|
|
#endif /* CPU_ENABLE_SSE */
|
|
|
|
|
2006-04-19 07:00:19 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
2014-11-02 22:58:30 +00:00
|
|
|
CTASSERT(sizeof(union savefpu) == 512);
|
|
|
|
CTASSERT(sizeof(struct xstate_hdr) == 64);
|
|
|
|
CTASSERT(sizeof(struct savefpu_ymm) == 832);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This requirement is to make it easier for asm code to calculate
|
|
|
|
* offset of the fpu save area from the pcb address. FPU save area
|
|
|
|
* must be 64-byte aligned.
|
|
|
|
*/
|
|
|
|
CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
|
|
|
|
|
2006-04-19 07:00:19 +00:00
|
|
|
static void fpu_clean_state(void);
|
|
|
|
#endif
|
|
|
|
|
2002-11-16 06:35:53 +00:00
|
|
|
static void fpusave(union savefpu *);
|
|
|
|
static void fpurstor(union savefpu *);
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
int hw_float;
|
1995-10-28 13:07:28 +00:00
|
|
|
|
2009-02-23 15:39:24 +00:00
|
|
|
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
|
|
|
|
&hw_float, 0, "Floating point instructions executed in hardware");
|
1995-10-28 13:07:28 +00:00
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
int use_xsave;
|
|
|
|
uint64_t xsave_mask;
|
|
|
|
static uma_zone_t fpu_save_area_zone;
|
|
|
|
static union savefpu *npx_initialstate;
|
|
|
|
|
|
|
|
struct xsave_area_elm_descr {
|
|
|
|
u_int offset;
|
|
|
|
u_int size;
|
|
|
|
} *xsave_area_desc;
|
|
|
|
|
|
|
|
static int use_xsaveopt;
|
|
|
|
|
1998-12-07 21:58:50 +00:00
|
|
|
static volatile u_int npx_traps_while_probing;
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2001-10-16 14:12:35 +00:00
|
|
|
alias_for_inthand_t probetrap;
|
1998-04-19 15:39:26 +00:00
|
|
|
__asm(" \n\
|
|
|
|
.text \n\
|
|
|
|
.p2align 2,0x90 \n\
|
1999-05-06 09:44:57 +00:00
|
|
|
.type " __XSTRING(CNAME(probetrap)) ",@function \n\
|
1998-04-19 15:39:26 +00:00
|
|
|
" __XSTRING(CNAME(probetrap)) ": \n\
|
|
|
|
ss \n\
|
|
|
|
incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
|
|
|
|
fnclex \n\
|
|
|
|
iret \n\
|
1993-06-12 14:58:17 +00:00
|
|
|
");
|
1997-07-21 07:57:50 +00:00
|
|
|
|
1999-08-22 19:52:51 +00:00
|
|
|
/*
|
2014-11-02 22:58:30 +00:00
|
|
|
* Determine if an FPU is present and how to use it.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
|
|
|
static int
|
2014-11-02 22:58:30 +00:00
|
|
|
npx_probe(void)
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
2001-10-16 14:12:35 +00:00
|
|
|
struct gate_descriptor save_idt_npxtrap;
|
2010-06-23 11:12:58 +00:00
|
|
|
u_short control, status;
|
2001-10-16 14:12:35 +00:00
|
|
|
|
2006-04-06 17:17:45 +00:00
|
|
|
/*
|
|
|
|
* Modern CPUs all have an FPU that uses the INT16 interface
|
|
|
|
* and provide a simple way to verify that, so handle the
|
|
|
|
* common case right away.
|
|
|
|
*/
|
|
|
|
if (cpu_feature & CPUID_FPU) {
|
2010-06-23 11:12:58 +00:00
|
|
|
hw_float = 1;
|
2014-11-02 22:58:30 +00:00
|
|
|
return (1);
|
2006-04-06 17:17:45 +00:00
|
|
|
}
|
|
|
|
|
2003-09-10 01:07:04 +00:00
|
|
|
save_idt_npxtrap = idt[IDT_MF];
|
|
|
|
setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
|
|
|
|
GSEL(GCODE_SEL, SEL_KPL));
|
2005-03-16 20:46:16 +00:00
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
2006-04-06 17:17:45 +00:00
|
|
|
* Don't trap while we're probing.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
|
|
|
stop_emulating();
|
2006-04-06 17:17:45 +00:00
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
|
|
|
* Finish resetting the coprocessor, if any. If there is an error
|
2001-10-16 14:12:35 +00:00
|
|
|
* pending, then we may get a bogus IRQ13, but npx_intr() will handle
|
1993-06-12 14:58:17 +00:00
|
|
|
* it OK. Bogus halts have never been observed, but we enabled
|
|
|
|
* IRQ13 and cleared the BUSY# latch early to handle them anyway.
|
|
|
|
*/
|
|
|
|
fninit();
|
1997-07-21 07:57:50 +00:00
|
|
|
|
1995-02-23 17:32:38 +00:00
|
|
|
/*
|
|
|
|
* Don't use fwait here because it might hang.
|
|
|
|
* Don't use fnop here because it usually hangs if there is no FPU.
|
|
|
|
*/
|
1995-01-03 04:00:06 +00:00
|
|
|
DELAY(1000); /* wait for any IRQ13 */
|
1993-06-12 14:58:17 +00:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
if (npx_traps_while_probing != 0)
|
|
|
|
printf("fninit caused %u bogus npx trap(s)\n",
|
|
|
|
npx_traps_while_probing);
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* Check for a status of mostly zero.
|
|
|
|
*/
|
|
|
|
status = 0x5a5a;
|
|
|
|
fnstsw(&status);
|
|
|
|
if ((status & 0xb8ff) == 0) {
|
|
|
|
/*
|
|
|
|
* Good, now check for a proper control word.
|
|
|
|
*/
|
1995-05-30 08:16:23 +00:00
|
|
|
control = 0x5a5a;
|
1993-06-12 14:58:17 +00:00
|
|
|
fnstcw(&control);
|
|
|
|
if ((control & 0x1f3f) == 0x033f) {
|
|
|
|
/*
|
|
|
|
* We have an npx, now divide by 0 to see if exception
|
|
|
|
* 16 works.
|
|
|
|
*/
|
|
|
|
control &= ~(1 << 2); /* enable divide by 0 trap */
|
2010-07-26 23:20:55 +00:00
|
|
|
fldcw(control);
|
2001-10-21 05:18:30 +00:00
|
|
|
#ifdef FPU_ERROR_BROKEN
|
|
|
|
/*
|
|
|
|
* FPU error signal doesn't work on some CPU
|
|
|
|
* accelerator board.
|
|
|
|
*/
|
2010-06-23 11:12:58 +00:00
|
|
|
hw_float = 1;
|
2014-11-02 22:58:30 +00:00
|
|
|
return (1);
|
2001-10-21 05:18:30 +00:00
|
|
|
#endif
|
2010-06-23 11:12:58 +00:00
|
|
|
npx_traps_while_probing = 0;
|
1993-06-12 14:58:17 +00:00
|
|
|
fp_divide_by_0();
|
|
|
|
if (npx_traps_while_probing != 0) {
|
|
|
|
/*
|
|
|
|
* Good, exception 16 works.
|
|
|
|
*/
|
2010-06-23 11:12:58 +00:00
|
|
|
hw_float = 1;
|
|
|
|
goto cleanup;
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
2014-11-02 22:58:30 +00:00
|
|
|
printf(
|
2010-06-23 11:12:58 +00:00
|
|
|
"FPU does not use exception 16 for error reporting\n");
|
|
|
|
goto cleanup;
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
}
|
2009-03-05 18:32:43 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
/*
|
|
|
|
* Probe failed. Floating point simply won't work.
|
|
|
|
* Notify user and disable FPU/MMX/SSE instruction execution.
|
|
|
|
*/
|
2014-11-02 22:58:30 +00:00
|
|
|
printf("WARNING: no FPU!\n");
|
2010-06-23 11:12:58 +00:00
|
|
|
__asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
|
|
|
|
"n" (CR0_EM | CR0_MP) : "ax");
|
2009-03-05 18:32:43 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
cleanup:
|
2003-09-10 01:07:04 +00:00
|
|
|
idt[IDT_MF] = save_idt_npxtrap;
|
2014-11-02 22:58:30 +00:00
|
|
|
return (hw_float);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-11-02 22:58:30 +00:00
|
|
|
* Enable XSAVE if supported and allowed by user.
|
|
|
|
* Calculate the xsave_mask.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2014-11-02 22:58:30 +00:00
|
|
|
static void
|
|
|
|
npxinit_bsp1(void)
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
2014-11-02 22:58:30 +00:00
|
|
|
u_int cp[4];
|
|
|
|
uint64_t xsave_mask_user;
|
1999-07-25 13:16:09 +00:00
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
|
|
|
|
use_xsave = 1;
|
|
|
|
TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
|
|
|
|
}
|
|
|
|
if (!use_xsave)
|
|
|
|
return;
|
1996-11-11 20:39:03 +00:00
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
cpuid_count(0xd, 0x0, cp);
|
|
|
|
xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
|
|
|
|
if ((cp[0] & xsave_mask) != xsave_mask)
|
|
|
|
panic("CPU0 does not support X87 or SSE: %x", cp[0]);
|
|
|
|
xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
|
|
|
|
xsave_mask_user = xsave_mask;
|
|
|
|
TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user);
|
|
|
|
xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
|
|
|
|
xsave_mask &= xsave_mask_user;
|
|
|
|
if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512)
|
|
|
|
xsave_mask &= ~XFEATURE_AVX512;
|
|
|
|
if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
|
|
|
|
xsave_mask &= ~XFEATURE_MPX;
|
|
|
|
|
|
|
|
cpuid_count(0xd, 0x1, cp);
|
|
|
|
if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
|
|
|
|
use_xsaveopt = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate the fpu save area size.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
npxinit_bsp2(void)
|
|
|
|
{
|
|
|
|
u_int cp[4];
|
|
|
|
|
|
|
|
if (use_xsave) {
|
|
|
|
cpuid_count(0xd, 0x0, cp);
|
|
|
|
cpu_max_ext_state_size = cp[1];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Reload the cpu_feature2, since we enabled OSXSAVE.
|
|
|
|
*/
|
|
|
|
do_cpuid(1, cp);
|
|
|
|
cpu_feature2 = cp[2];
|
|
|
|
} else
|
|
|
|
cpu_max_ext_state_size = sizeof(union savefpu);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize floating point unit.
|
|
|
|
*/
|
|
|
|
void
|
2014-11-02 22:58:30 +00:00
|
|
|
npxinit(bool bsp)
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
2001-07-17 13:06:47 +00:00
|
|
|
static union savefpu dummy;
|
2010-10-25 15:31:13 +00:00
|
|
|
register_t saveintr;
|
2014-11-02 22:58:30 +00:00
|
|
|
u_int mxcsr;
|
2009-03-05 18:32:43 +00:00
|
|
|
u_short control;
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
if (bsp) {
|
|
|
|
if (!npx_probe())
|
|
|
|
return;
|
|
|
|
npxinit_bsp1();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (use_xsave) {
|
|
|
|
load_cr4(rcr4() | CR4_XSAVE);
|
|
|
|
load_xcr(XCR0, xsave_mask);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XCR0 shall be set up before CPU can report the save area size.
|
|
|
|
*/
|
|
|
|
if (bsp)
|
|
|
|
npxinit_bsp2();
|
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
|
|
|
* fninit has the same h/w bugs as fnsave. Use the detoxified
|
2014-11-02 22:58:30 +00:00
|
|
|
* fnsave to throw away any junk in the fpu. fpusave() initializes
|
|
|
|
* the fpu.
|
2010-06-23 11:21:19 +00:00
|
|
|
*
|
|
|
|
* It is too early for critical_enter() to work on AP.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2010-10-25 15:31:13 +00:00
|
|
|
saveintr = intr_disable();
|
1993-06-12 14:58:17 +00:00
|
|
|
stop_emulating();
|
2001-10-15 20:18:06 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (cpu_fxsr)
|
|
|
|
fninit();
|
2014-11-02 22:58:30 +00:00
|
|
|
else
|
2001-10-15 20:18:06 +00:00
|
|
|
#endif
|
2014-11-02 22:58:30 +00:00
|
|
|
fnsave(&dummy);
|
2009-03-05 18:32:43 +00:00
|
|
|
control = __INITIAL_NPXCW__;
|
2010-07-26 23:20:55 +00:00
|
|
|
fldcw(control);
|
2014-11-02 21:34:24 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (cpu_fxsr) {
|
|
|
|
mxcsr = __INITIAL_MXCSR__;
|
|
|
|
ldmxcsr(mxcsr);
|
|
|
|
}
|
|
|
|
#endif
|
1993-06-12 14:58:17 +00:00
|
|
|
start_emulating();
|
2010-10-25 15:31:13 +00:00
|
|
|
intr_restore(saveintr);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
/*
|
|
|
|
* On the boot CPU we generate a clean state that is used to
|
|
|
|
* initialize the floating point unit when it is first used by a
|
|
|
|
* process.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
npxinitstate(void *arg __unused)
|
|
|
|
{
|
|
|
|
register_t saveintr;
|
|
|
|
int cp[4], i, max_ext_n;
|
|
|
|
|
|
|
|
if (!hw_float)
|
|
|
|
return;
|
|
|
|
|
|
|
|
npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
|
|
|
|
M_WAITOK | M_ZERO);
|
|
|
|
saveintr = intr_disable();
|
|
|
|
stop_emulating();
|
|
|
|
|
|
|
|
fpusave(npx_initialstate);
|
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (cpu_fxsr) {
|
|
|
|
if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask)
|
|
|
|
cpu_mxcsr_mask =
|
|
|
|
npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask;
|
|
|
|
else
|
|
|
|
cpu_mxcsr_mask = 0xFFBF;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The fninit instruction does not modify XMM
|
|
|
|
* registers. The fpusave call dumped the garbage
|
|
|
|
* contained in the registers after reset to the
|
|
|
|
* initial state saved. Clear XMM registers file
|
|
|
|
* image to make the startup program state and signal
|
|
|
|
* handler XMM register content predictable.
|
|
|
|
*/
|
|
|
|
bzero(npx_initialstate->sv_xmm.sv_fp,
|
|
|
|
sizeof(npx_initialstate->sv_xmm.sv_fp));
|
|
|
|
bzero(npx_initialstate->sv_xmm.sv_xmm,
|
|
|
|
sizeof(npx_initialstate->sv_xmm.sv_xmm));
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
bzero(npx_initialstate->sv_87.sv_ac,
|
|
|
|
sizeof(npx_initialstate->sv_87.sv_ac));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a table describing the layout of the CPU Extended
|
|
|
|
* Save Area.
|
|
|
|
*/
|
|
|
|
if (use_xsave) {
|
|
|
|
if (xsave_mask >> 32 != 0)
|
|
|
|
max_ext_n = fls(xsave_mask >> 32) + 32;
|
|
|
|
else
|
|
|
|
max_ext_n = fls(xsave_mask);
|
|
|
|
xsave_area_desc = malloc(max_ext_n * sizeof(struct
|
|
|
|
xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
|
|
|
|
/* x87 state */
|
|
|
|
xsave_area_desc[0].offset = 0;
|
|
|
|
xsave_area_desc[0].size = 160;
|
|
|
|
/* XMM */
|
|
|
|
xsave_area_desc[1].offset = 160;
|
|
|
|
xsave_area_desc[1].size = 288 - 160;
|
|
|
|
|
|
|
|
for (i = 2; i < max_ext_n; i++) {
|
|
|
|
cpuid_count(0xd, i, cp);
|
|
|
|
xsave_area_desc[i].offset = cp[1];
|
|
|
|
xsave_area_desc[i].size = cp[0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fpu_save_area_zone = uma_zcreate("FPU_save_area",
|
|
|
|
cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
|
|
|
|
XSAVE_AREA_ALIGN - 1, 0);
|
|
|
|
|
|
|
|
start_emulating();
|
|
|
|
intr_restore(saveintr);
|
|
|
|
}
|
|
|
|
SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL);
|
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
|
|
|
* Free coprocessor (if we have it).
|
|
|
|
*/
|
|
|
|
void
|
2001-09-12 08:38:13 +00:00
|
|
|
npxexit(td)
|
|
|
|
struct thread *td;
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
|
|
|
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_enter();
|
2014-11-02 22:58:30 +00:00
|
|
|
if (curthread == PCPU_GET(fpcurthread)) {
|
|
|
|
stop_emulating();
|
|
|
|
fpusave(curpcb->pcb_save);
|
|
|
|
start_emulating();
|
|
|
|
PCPU_SET(fpcurthread, NULL);
|
|
|
|
}
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
1997-01-29 13:46:28 +00:00
|
|
|
#ifdef NPX_DEBUG
|
2010-06-23 11:12:58 +00:00
|
|
|
if (hw_float) {
|
1994-11-14 14:59:06 +00:00
|
|
|
u_int masked_exceptions;
|
|
|
|
|
2002-09-16 19:25:59 +00:00
|
|
|
masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
|
1994-11-14 14:59:06 +00:00
|
|
|
/*
|
1997-01-29 13:46:28 +00:00
|
|
|
* Log exceptions that would have trapped with the old
|
|
|
|
* control word (overflow, divide by 0, and invalid operand).
|
1994-11-14 14:59:06 +00:00
|
|
|
*/
|
|
|
|
if (masked_exceptions & 0x0d)
|
|
|
|
log(LOG_ERR,
|
2001-09-18 21:05:04 +00:00
|
|
|
"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
|
|
|
|
td->td_proc->p_pid, td->td_proc->p_comm,
|
|
|
|
masked_exceptions);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
1997-01-29 13:46:28 +00:00
|
|
|
#endif
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
|
2002-09-16 19:25:59 +00:00
|
|
|
int
|
|
|
|
npxformat()
|
|
|
|
{
|
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
if (!hw_float)
|
2002-09-16 19:25:59 +00:00
|
|
|
return (_MC_FPFMT_NODEV);
|
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (cpu_fxsr)
|
|
|
|
return (_MC_FPFMT_XMM);
|
|
|
|
#endif
|
|
|
|
return (_MC_FPFMT_387);
|
|
|
|
}
|
|
|
|
|
1999-07-25 13:16:09 +00:00
|
|
|
/*
|
|
|
|
* The following mechanism is used to ensure that the FPE_... value
|
|
|
|
* that is passed as a trapcode to the signal handler of the user
|
|
|
|
* process does not have more than one bit set.
|
|
|
|
*
|
|
|
|
* Multiple bits may be set if the user process modifies the control
|
1999-07-26 05:47:31 +00:00
|
|
|
* word while a status word bit is already set. While this is a sign
|
1999-07-25 13:16:09 +00:00
|
|
|
* of bad coding, we have no choise than to narrow them down to one
|
|
|
|
* bit, since we must not send a trapcode that is not exactly one of
|
|
|
|
* the FPE_ macros.
|
|
|
|
*
|
1999-07-26 05:47:31 +00:00
|
|
|
* The mechanism has a static table with 127 entries. Each combination
|
1999-07-25 13:16:09 +00:00
|
|
|
* of the 7 FPU status word exception bits directly translates to a
|
|
|
|
* position in this table, where a single FPE_... value is stored.
|
|
|
|
* This FPE_... value stored there is considered the "most important"
|
1999-07-26 05:47:31 +00:00
|
|
|
* of the exception bits and will be sent as the signal code. The
|
1999-07-25 13:16:09 +00:00
|
|
|
* precedence of the bits is based upon Intel Document "Numerical
|
|
|
|
* Applications", Chapter "Special Computational Situations".
|
|
|
|
*
|
|
|
|
* The macro to choose one of these values does these steps: 1) Throw
|
1999-07-26 05:47:31 +00:00
|
|
|
* away status word bits that cannot be masked. 2) Throw away the bits
|
1999-07-25 13:16:09 +00:00
|
|
|
* currently masked in the control word, assuming the user isn't
|
1999-07-26 05:47:31 +00:00
|
|
|
* interested in them anymore. 3) Reinsert status word bit 7 (stack
|
1999-07-25 13:16:09 +00:00
|
|
|
* fault) if it is set, which cannot be masked but must be presered.
|
|
|
|
* 4) Use the remaining bits to point into the trapcode table.
|
|
|
|
*
|
|
|
|
* The 6 maskable bits in order of their preference, as stated in the
|
|
|
|
* above referenced Intel manual:
|
|
|
|
* 1 Invalid operation (FP_X_INV)
|
|
|
|
* 1a Stack underflow
|
|
|
|
* 1b Stack overflow
|
|
|
|
* 1c Operand of unsupported format
|
|
|
|
* 1d SNaN operand.
|
|
|
|
* 2 QNaN operand (not an exception, irrelavant here)
|
|
|
|
* 3 Any other invalid-operation not mentioned above or zero divide
|
|
|
|
* (FP_X_INV, FP_X_DZ)
|
|
|
|
* 4 Denormal operand (FP_X_DNML)
|
|
|
|
* 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
|
1999-07-26 05:47:31 +00:00
|
|
|
* 6 Inexact result (FP_X_IMP)
|
|
|
|
*/
|
1999-07-25 13:16:09 +00:00
|
|
|
static char fpetable[128] = {
|
|
|
|
0,
|
1999-07-26 05:47:31 +00:00
|
|
|
FPE_FLTINV, /* 1 - INV */
|
|
|
|
FPE_FLTUND, /* 2 - DNML */
|
|
|
|
FPE_FLTINV, /* 3 - INV | DNML */
|
|
|
|
FPE_FLTDIV, /* 4 - DZ */
|
|
|
|
FPE_FLTINV, /* 5 - INV | DZ */
|
|
|
|
FPE_FLTDIV, /* 6 - DNML | DZ */
|
|
|
|
FPE_FLTINV, /* 7 - INV | DNML | DZ */
|
|
|
|
FPE_FLTOVF, /* 8 - OFL */
|
|
|
|
FPE_FLTINV, /* 9 - INV | OFL */
|
|
|
|
FPE_FLTUND, /* A - DNML | OFL */
|
|
|
|
FPE_FLTINV, /* B - INV | DNML | OFL */
|
|
|
|
FPE_FLTDIV, /* C - DZ | OFL */
|
|
|
|
FPE_FLTINV, /* D - INV | DZ | OFL */
|
|
|
|
FPE_FLTDIV, /* E - DNML | DZ | OFL */
|
|
|
|
FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
|
|
|
|
FPE_FLTUND, /* 10 - UFL */
|
|
|
|
FPE_FLTINV, /* 11 - INV | UFL */
|
|
|
|
FPE_FLTUND, /* 12 - DNML | UFL */
|
|
|
|
FPE_FLTINV, /* 13 - INV | DNML | UFL */
|
|
|
|
FPE_FLTDIV, /* 14 - DZ | UFL */
|
|
|
|
FPE_FLTINV, /* 15 - INV | DZ | UFL */
|
|
|
|
FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
|
|
|
|
FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
|
|
|
|
FPE_FLTOVF, /* 18 - OFL | UFL */
|
|
|
|
FPE_FLTINV, /* 19 - INV | OFL | UFL */
|
|
|
|
FPE_FLTUND, /* 1A - DNML | OFL | UFL */
|
|
|
|
FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
|
|
|
|
FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
|
|
|
|
FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
|
|
|
|
FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
|
|
|
|
FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
|
|
|
|
FPE_FLTRES, /* 20 - IMP */
|
|
|
|
FPE_FLTINV, /* 21 - INV | IMP */
|
|
|
|
FPE_FLTUND, /* 22 - DNML | IMP */
|
|
|
|
FPE_FLTINV, /* 23 - INV | DNML | IMP */
|
|
|
|
FPE_FLTDIV, /* 24 - DZ | IMP */
|
|
|
|
FPE_FLTINV, /* 25 - INV | DZ | IMP */
|
|
|
|
FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
|
|
|
|
FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
|
|
|
|
FPE_FLTOVF, /* 28 - OFL | IMP */
|
|
|
|
FPE_FLTINV, /* 29 - INV | OFL | IMP */
|
|
|
|
FPE_FLTUND, /* 2A - DNML | OFL | IMP */
|
|
|
|
FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
|
|
|
|
FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
|
|
|
|
FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
|
|
|
|
FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
|
|
|
|
FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
|
|
|
|
FPE_FLTUND, /* 30 - UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 31 - INV | UFL | IMP */
|
|
|
|
FPE_FLTUND, /* 32 - DNML | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
|
|
|
|
FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
|
|
|
|
FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
|
|
|
|
FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
|
|
|
|
FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
|
|
|
|
FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
|
|
|
|
FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
|
|
|
|
FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
|
|
|
|
FPE_FLTSUB, /* 40 - STK */
|
|
|
|
FPE_FLTSUB, /* 41 - INV | STK */
|
|
|
|
FPE_FLTUND, /* 42 - DNML | STK */
|
|
|
|
FPE_FLTSUB, /* 43 - INV | DNML | STK */
|
|
|
|
FPE_FLTDIV, /* 44 - DZ | STK */
|
|
|
|
FPE_FLTSUB, /* 45 - INV | DZ | STK */
|
|
|
|
FPE_FLTDIV, /* 46 - DNML | DZ | STK */
|
|
|
|
FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
|
|
|
|
FPE_FLTOVF, /* 48 - OFL | STK */
|
|
|
|
FPE_FLTSUB, /* 49 - INV | OFL | STK */
|
|
|
|
FPE_FLTUND, /* 4A - DNML | OFL | STK */
|
|
|
|
FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
|
|
|
|
FPE_FLTDIV, /* 4C - DZ | OFL | STK */
|
|
|
|
FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
|
|
|
|
FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
|
|
|
|
FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
|
|
|
|
FPE_FLTUND, /* 50 - UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 51 - INV | UFL | STK */
|
|
|
|
FPE_FLTUND, /* 52 - DNML | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
|
|
|
|
FPE_FLTDIV, /* 54 - DZ | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
|
|
|
|
FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
|
|
|
|
FPE_FLTOVF, /* 58 - OFL | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
|
|
|
|
FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
|
|
|
|
FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
|
|
|
|
FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
|
|
|
|
FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
|
|
|
|
FPE_FLTRES, /* 60 - IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 61 - INV | IMP | STK */
|
|
|
|
FPE_FLTUND, /* 62 - DNML | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 64 - DZ | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
|
|
|
|
FPE_FLTOVF, /* 68 - OFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
|
|
|
|
FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
|
|
|
|
FPE_FLTUND, /* 70 - UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
|
|
|
|
FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
|
|
|
|
FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
|
|
|
|
FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
|
1999-07-25 13:16:09 +00:00
|
|
|
};
|
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
2012-07-21 21:49:05 +00:00
|
|
|
* Read the FP status and control words, then generate si_code value
|
|
|
|
* for SIGFPE. The error code chosen will be one of the
|
|
|
|
* FPE_... macros. It will be sent as the second argument to old
|
|
|
|
* BSD-style signal handlers and as "siginfo_t->si_code" (second
|
|
|
|
* argument) to SA_SIGINFO signal handlers.
|
1995-01-03 04:00:06 +00:00
|
|
|
*
|
2012-07-21 21:49:05 +00:00
|
|
|
* Some time ago, we cleared the x87 exceptions with FNCLEX there.
|
|
|
|
* Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
|
|
|
|
* usermode code which understands the FPU hardware enough to enable
|
|
|
|
* the exceptions, can also handle clearing the exception state in the
|
|
|
|
* handler. The only consequence of not clearing the exception is the
|
|
|
|
* rethrow of the SIGFPE on return from the signal handler and
|
|
|
|
* reexecution of the corresponding instruction.
|
1995-01-03 04:00:06 +00:00
|
|
|
*
|
2012-07-21 21:49:05 +00:00
|
|
|
* For XMM traps, the exceptions were never cleared.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2001-05-22 21:20:49 +00:00
|
|
|
int
|
2012-07-21 21:52:48 +00:00
|
|
|
npxtrap_x87(void)
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
2001-05-22 21:20:49 +00:00
|
|
|
u_short control, status;
|
1993-06-12 14:58:17 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
if (!hw_float) {
|
2012-07-21 21:52:48 +00:00
|
|
|
printf(
|
|
|
|
"npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
|
2010-06-23 11:12:58 +00:00
|
|
|
PCPU_GET(fpcurthread), curthread, hw_float);
|
2001-05-22 21:20:49 +00:00
|
|
|
panic("npxtrap from nowhere");
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_enter();
|
1993-06-12 14:58:17 +00:00
|
|
|
|
|
|
|
/*
|
2001-05-22 21:20:49 +00:00
|
|
|
* Interrupt handling (for another interrupt) may have pushed the
|
|
|
|
* state to memory. Fetch the relevant parts of the state from
|
|
|
|
* wherever they are.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2001-12-11 23:33:44 +00:00
|
|
|
if (PCPU_GET(fpcurthread) != curthread) {
|
2001-09-12 08:38:13 +00:00
|
|
|
control = GET_FPU_CW(curthread);
|
|
|
|
status = GET_FPU_SW(curthread);
|
1993-06-12 14:58:17 +00:00
|
|
|
} else {
|
2001-05-22 21:20:49 +00:00
|
|
|
fnstcw(&control);
|
|
|
|
fnstsw(&status);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
2001-05-22 21:20:49 +00:00
|
|
|
return (fpetable[status & ((~control & 0x3f) | 0x40)]);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
|
2012-07-21 21:52:48 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
int
|
|
|
|
npxtrap_sse(void)
|
|
|
|
{
|
|
|
|
u_int mxcsr;
|
|
|
|
|
|
|
|
if (!hw_float) {
|
|
|
|
printf(
|
|
|
|
"npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
|
|
|
|
PCPU_GET(fpcurthread), curthread, hw_float);
|
|
|
|
panic("npxtrap from nowhere");
|
|
|
|
}
|
|
|
|
critical_enter();
|
|
|
|
if (PCPU_GET(fpcurthread) != curthread)
|
|
|
|
mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
|
|
|
|
else
|
|
|
|
stmxcsr(&mxcsr);
|
|
|
|
critical_exit();
|
|
|
|
return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
/*
|
|
|
|
* Implement device not available (DNA) exception
|
|
|
|
*
|
2001-12-11 23:33:44 +00:00
|
|
|
* It would be better to switch FP context here (if curthread != fpcurthread)
|
1995-01-03 04:00:06 +00:00
|
|
|
* and not necessarily for every context switch, but it is too hard to
|
|
|
|
* access foreign pcb's.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2002-09-16 19:25:59 +00:00
|
|
|
|
|
|
|
static int err_count = 0;
|
|
|
|
|
1993-06-12 14:58:17 +00:00
|
|
|
int
|
2009-03-05 18:32:43 +00:00
|
|
|
npxdna(void)
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
2001-02-01 03:34:20 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
if (!hw_float)
|
1993-06-12 14:58:17 +00:00
|
|
|
return (0);
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_enter();
|
2002-09-16 19:25:59 +00:00
|
|
|
if (PCPU_GET(fpcurthread) == curthread) {
|
|
|
|
printf("npxdna: fpcurthread == curthread %d times\n",
|
|
|
|
++err_count);
|
|
|
|
stop_emulating();
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
2002-09-16 19:25:59 +00:00
|
|
|
return (1);
|
|
|
|
}
|
2001-12-11 23:33:44 +00:00
|
|
|
if (PCPU_GET(fpcurthread) != NULL) {
|
2002-09-16 19:25:59 +00:00
|
|
|
printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
|
|
|
|
PCPU_GET(fpcurthread),
|
|
|
|
PCPU_GET(fpcurthread)->td_proc->p_pid,
|
|
|
|
curthread, curthread->td_proc->p_pid);
|
1993-06-12 14:58:17 +00:00
|
|
|
panic("npxdna");
|
|
|
|
}
|
|
|
|
stop_emulating();
|
|
|
|
/*
|
2014-11-02 22:58:30 +00:00
|
|
|
* Record new context early in case frstor causes a trap.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
2001-12-11 23:33:44 +00:00
|
|
|
PCPU_SET(fpcurthread, curthread);
|
2001-07-12 06:32:51 +00:00
|
|
|
|
2009-03-25 22:08:30 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (cpu_fxsr)
|
|
|
|
fpu_clean_state();
|
|
|
|
#endif
|
|
|
|
|
2012-07-26 09:11:37 +00:00
|
|
|
if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
|
2002-09-16 19:25:59 +00:00
|
|
|
/*
|
|
|
|
* This is the first time this thread has used the FPU or
|
|
|
|
* the PCB doesn't contain a clean FPU state. Explicitly
|
2009-03-25 14:17:08 +00:00
|
|
|
* load an initial state.
|
2014-11-02 22:58:30 +00:00
|
|
|
*
|
|
|
|
* We prefer to restore the state from the actual save
|
|
|
|
* area in PCB instead of directly loading from
|
|
|
|
* npx_initialstate, to ignite the XSAVEOPT
|
|
|
|
* tracking engine.
|
2002-09-16 19:25:59 +00:00
|
|
|
*/
|
2014-11-02 22:58:30 +00:00
|
|
|
bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
|
|
|
|
fpurstor(curpcb->pcb_save);
|
2012-07-26 09:11:37 +00:00
|
|
|
if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
|
|
|
|
fldcw(curpcb->pcb_initial_npxcw);
|
|
|
|
curpcb->pcb_flags |= PCB_NPXINITDONE;
|
|
|
|
if (PCB_USER_FPU(curpcb))
|
|
|
|
curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
|
2002-09-16 19:25:59 +00:00
|
|
|
} else {
|
2012-07-26 09:11:37 +00:00
|
|
|
fpurstor(curpcb->pcb_save);
|
2002-09-16 19:25:59 +00:00
|
|
|
}
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
1993-06-12 14:58:17 +00:00
|
|
|
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-11-02 22:58:30 +00:00
|
|
|
* Wrapper for fpusave() called from context switch routines.
|
2001-05-20 20:04:40 +00:00
|
|
|
*
|
|
|
|
* npxsave() must be called with interrupts disabled, so that it clears
|
2001-12-11 23:33:44 +00:00
|
|
|
* fpcurthread atomically with saving the state. We require callers to do the
|
2001-05-20 20:04:40 +00:00
|
|
|
* disabling, since most callers need to disable interrupts anyway to call
|
2001-12-11 23:33:44 +00:00
|
|
|
* npxsave() atomically with checking fpcurthread.
|
1993-06-12 14:58:17 +00:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
npxsave(addr)
|
2001-07-12 06:32:51 +00:00
|
|
|
union savefpu *addr;
|
1993-06-12 14:58:17 +00:00
|
|
|
{
|
1997-07-21 07:57:50 +00:00
|
|
|
|
|
|
|
stop_emulating();
|
2014-11-02 22:58:30 +00:00
|
|
|
if (use_xsaveopt)
|
|
|
|
xsaveopt((char *)addr, xsave_mask);
|
|
|
|
else
|
|
|
|
fpusave(addr);
|
1997-07-21 07:57:50 +00:00
|
|
|
start_emulating();
|
2001-12-11 23:33:44 +00:00
|
|
|
PCPU_SET(fpcurthread, NULL);
|
1993-06-12 14:58:17 +00:00
|
|
|
}
|
|
|
|
|
2014-08-30 17:48:38 +00:00
|
|
|
/*
|
|
|
|
* Unconditionally save the current co-processor state across suspend and
|
|
|
|
* resume.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
npxsuspend(union savefpu *addr)
|
|
|
|
{
|
|
|
|
register_t cr0;
|
|
|
|
|
|
|
|
if (!hw_float)
|
|
|
|
return;
|
|
|
|
if (PCPU_GET(fpcurthread) == NULL) {
|
2014-11-02 22:58:30 +00:00
|
|
|
bcopy(npx_initialstate, addr, cpu_max_ext_state_size);
|
2014-08-30 17:48:38 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
cr0 = rcr0();
|
2014-11-02 22:58:30 +00:00
|
|
|
stop_emulating();
|
2014-08-30 17:48:38 +00:00
|
|
|
fpusave(addr);
|
|
|
|
load_cr0(cr0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
npxresume(union savefpu *addr)
|
|
|
|
{
|
|
|
|
register_t cr0;
|
|
|
|
|
|
|
|
if (!hw_float)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cr0 = rcr0();
|
2014-11-02 22:58:30 +00:00
|
|
|
npxinit(false);
|
2014-08-30 17:48:38 +00:00
|
|
|
stop_emulating();
|
|
|
|
fpurstor(addr);
|
|
|
|
load_cr0(cr0);
|
|
|
|
}
|
|
|
|
|
2002-09-16 19:25:59 +00:00
|
|
|
void
|
|
|
|
npxdrop()
|
|
|
|
{
|
|
|
|
struct thread *td;
|
|
|
|
|
2004-06-18 02:10:55 +00:00
|
|
|
/*
|
|
|
|
* Discard pending exceptions in the !cpu_fxsr case so that unmasked
|
|
|
|
* ones don't cause a panic on the next frstor.
|
|
|
|
*/
|
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (!cpu_fxsr)
|
|
|
|
#endif
|
|
|
|
fnclex();
|
|
|
|
|
2002-09-16 19:25:59 +00:00
|
|
|
td = PCPU_GET(fpcurthread);
|
2010-06-23 11:21:19 +00:00
|
|
|
KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
|
|
|
|
CRITICAL_ASSERT(td);
|
2002-09-16 19:25:59 +00:00
|
|
|
PCPU_SET(fpcurthread, NULL);
|
|
|
|
td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
|
|
|
|
start_emulating();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2010-11-26 14:50:42 +00:00
|
|
|
* Get the user state of the FPU into pcb->pcb_user_save without
|
|
|
|
* dropping ownership (if possible). It returns the FPU ownership
|
|
|
|
* status.
|
2002-09-16 19:25:59 +00:00
|
|
|
*/
|
|
|
|
int
|
2010-11-26 14:50:42 +00:00
|
|
|
npxgetregs(struct thread *td)
|
2002-09-16 19:25:59 +00:00
|
|
|
{
|
2010-06-05 15:59:59 +00:00
|
|
|
struct pcb *pcb;
|
2014-11-02 22:58:30 +00:00
|
|
|
uint64_t *xstate_bv, bit;
|
|
|
|
char *sa;
|
|
|
|
int max_ext_n, i, owned;
|
2002-09-16 19:25:59 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
if (!hw_float)
|
2002-09-16 19:25:59 +00:00
|
|
|
return (_MC_FPOWNED_NONE);
|
|
|
|
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb = td->td_pcb;
|
|
|
|
if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
|
2014-11-02 22:58:30 +00:00
|
|
|
bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb),
|
|
|
|
cpu_max_ext_state_size);
|
|
|
|
SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw);
|
2010-11-26 14:50:42 +00:00
|
|
|
npxuserinited(td);
|
2010-06-05 15:59:59 +00:00
|
|
|
return (_MC_FPOWNED_PCB);
|
|
|
|
}
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_enter();
|
2010-11-26 14:50:42 +00:00
|
|
|
if (td == PCPU_GET(fpcurthread)) {
|
2014-11-02 22:58:30 +00:00
|
|
|
fpusave(get_pcb_user_save_pcb(pcb));
|
2010-06-05 15:59:59 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (!cpu_fxsr)
|
|
|
|
#endif
|
|
|
|
/*
|
|
|
|
* fnsave initializes the FPU and destroys whatever
|
|
|
|
* context it contains. Make sure the FPU owner
|
|
|
|
* starts with a clean state next time.
|
|
|
|
*/
|
|
|
|
npxdrop();
|
2014-11-02 22:58:30 +00:00
|
|
|
owned = _MC_FPOWNED_FPU;
|
2010-06-05 15:59:59 +00:00
|
|
|
} else {
|
2014-11-02 22:58:30 +00:00
|
|
|
owned = _MC_FPOWNED_PCB;
|
|
|
|
}
|
|
|
|
critical_exit();
|
|
|
|
if (use_xsave) {
|
|
|
|
/*
|
|
|
|
* Handle partially saved state.
|
|
|
|
*/
|
|
|
|
sa = (char *)get_pcb_user_save_pcb(pcb);
|
|
|
|
xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) +
|
|
|
|
offsetof(struct xstate_hdr, xstate_bv));
|
|
|
|
if (xsave_mask >> 32 != 0)
|
|
|
|
max_ext_n = fls(xsave_mask >> 32) + 32;
|
|
|
|
else
|
|
|
|
max_ext_n = fls(xsave_mask);
|
|
|
|
for (i = 0; i < max_ext_n; i++) {
|
|
|
|
bit = 1ULL << i;
|
|
|
|
if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
|
|
|
|
continue;
|
|
|
|
bcopy((char *)npx_initialstate +
|
|
|
|
xsave_area_desc[i].offset,
|
|
|
|
sa + xsave_area_desc[i].offset,
|
|
|
|
xsave_area_desc[i].size);
|
|
|
|
*xstate_bv |= bit;
|
|
|
|
}
|
2002-09-16 19:25:59 +00:00
|
|
|
}
|
2014-11-02 22:58:30 +00:00
|
|
|
return (owned);
|
2002-09-16 19:25:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2010-11-26 14:50:42 +00:00
|
|
|
npxuserinited(struct thread *td)
|
2002-09-16 19:25:59 +00:00
|
|
|
{
|
2010-06-05 15:59:59 +00:00
|
|
|
struct pcb *pcb;
|
2002-09-16 19:25:59 +00:00
|
|
|
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb = td->td_pcb;
|
|
|
|
if (PCB_USER_FPU(pcb))
|
2010-11-26 14:50:42 +00:00
|
|
|
pcb->pcb_flags |= PCB_NPXINITDONE;
|
|
|
|
pcb->pcb_flags |= PCB_NPXUSERINITDONE;
|
2010-06-05 15:59:59 +00:00
|
|
|
}
|
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
int
|
|
|
|
npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
|
|
|
|
{
|
|
|
|
struct xstate_hdr *hdr, *ehdr;
|
|
|
|
size_t len, max_len;
|
|
|
|
uint64_t bv;
|
2010-11-26 14:50:42 +00:00
|
|
|
|
2014-11-02 22:58:30 +00:00
|
|
|
/* XXXKIB should we clear all extended state in xstate_bv instead ? */
|
|
|
|
if (xfpustate == NULL)
|
|
|
|
return (0);
|
|
|
|
if (!use_xsave)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
|
|
|
|
len = xfpustate_size;
|
|
|
|
if (len < sizeof(struct xstate_hdr))
|
|
|
|
return (EINVAL);
|
|
|
|
max_len = cpu_max_ext_state_size - sizeof(union savefpu);
|
|
|
|
if (len > max_len)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
ehdr = (struct xstate_hdr *)xfpustate;
|
|
|
|
bv = ehdr->xstate_bv;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Avoid #gp.
|
|
|
|
*/
|
|
|
|
if (bv & ~xsave_mask)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
|
|
|
|
|
|
|
|
hdr->xstate_bv = bv;
|
|
|
|
bcopy(xfpustate + sizeof(struct xstate_hdr),
|
|
|
|
(char *)(hdr + 1), len - sizeof(struct xstate_hdr));
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
|
|
|
|
size_t xfpustate_size)
|
2010-06-05 15:59:59 +00:00
|
|
|
{
|
|
|
|
struct pcb *pcb;
|
2014-11-02 22:58:30 +00:00
|
|
|
int error;
|
2010-06-05 15:59:59 +00:00
|
|
|
|
2010-06-23 11:12:58 +00:00
|
|
|
if (!hw_float)
|
2014-11-02 22:58:30 +00:00
|
|
|
return (ENXIO);
|
2010-06-05 15:59:59 +00:00
|
|
|
|
|
|
|
pcb = td->td_pcb;
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_enter();
|
2010-06-05 15:59:59 +00:00
|
|
|
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
|
2014-11-02 22:58:30 +00:00
|
|
|
error = npxsetxstate(td, xfpustate, xfpustate_size);
|
|
|
|
if (error != 0) {
|
|
|
|
critical_exit();
|
|
|
|
return (error);
|
|
|
|
}
|
2010-06-05 15:59:59 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
if (!cpu_fxsr)
|
|
|
|
#endif
|
|
|
|
fnclex(); /* As in npxdrop(). */
|
2014-11-02 22:58:30 +00:00
|
|
|
bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
|
|
|
|
fpurstor(get_pcb_user_save_td(td));
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
|
|
|
|
} else {
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
2014-11-02 22:58:30 +00:00
|
|
|
error = npxsetxstate(td, xfpustate, xfpustate_size);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
|
2010-11-26 14:50:42 +00:00
|
|
|
npxuserinited(td);
|
2002-09-16 19:25:59 +00:00
|
|
|
}
|
2014-11-02 22:58:30 +00:00
|
|
|
return (0);
|
2002-09-16 19:25:59 +00:00
|
|
|
}
|
|
|
|
|
2001-07-12 06:32:51 +00:00
|
|
|
static void
|
2001-07-16 06:00:23 +00:00
|
|
|
fpusave(addr)
|
2001-07-12 06:32:51 +00:00
|
|
|
union savefpu *addr;
|
|
|
|
{
|
|
|
|
|
2001-08-23 01:03:56 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
2014-11-02 22:58:30 +00:00
|
|
|
if (use_xsave)
|
|
|
|
xsave((char *)addr, xsave_mask);
|
|
|
|
else if (cpu_fxsr)
|
2001-07-17 13:06:47 +00:00
|
|
|
fxsave(addr);
|
2001-08-23 01:03:56 +00:00
|
|
|
else
|
|
|
|
#endif
|
|
|
|
fnsave(addr);
|
2001-07-12 06:32:51 +00:00
|
|
|
}
|
|
|
|
|
2006-04-19 07:00:19 +00:00
|
|
|
#ifdef CPU_ENABLE_SSE
|
|
|
|
/*
|
|
|
|
* On AuthenticAMD processors, the fxrstor instruction does not restore
|
|
|
|
* the x87's stored last instruction pointer, last data pointer, and last
|
|
|
|
* opcode values, except in the rare case in which the exception summary
|
|
|
|
* (ES) bit in the x87 status word is set to 1.
|
|
|
|
*
|
|
|
|
* In order to avoid leaking this information across processes, we clean
|
|
|
|
* these values by performing a dummy load before executing fxrstor().
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
fpu_clean_state(void)
|
|
|
|
{
|
2009-03-25 22:08:30 +00:00
|
|
|
static float dummy_variable = 0.0;
|
2006-04-19 07:00:19 +00:00
|
|
|
u_short status;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear the ES bit in the x87 status word if it is currently
|
|
|
|
* set, in order to avoid causing a fault in the upcoming load.
|
|
|
|
*/
|
|
|
|
fnstsw(&status);
|
|
|
|
if (status & 0x80)
|
|
|
|
fnclex();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Load the dummy variable into the x87 stack. This mangles
|
|
|
|
* the x87 stack, but we don't care since we're about to call
|
|
|
|
* fxrstor() anyway.
|
|
|
|
*/
|
2010-11-25 18:14:18 +00:00
|
|
|
__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
|
2006-04-19 07:00:19 +00:00
|
|
|
}
|
|
|
|
#endif /* CPU_ENABLE_SSE */
|
|
|
|
|
2001-07-12 06:32:51 +00:00
|
|
|
static void
|
2001-07-16 06:00:23 +00:00
|
|
|
fpurstor(addr)
|
2001-07-12 06:32:51 +00:00
|
|
|
union savefpu *addr;
|
|
|
|
{
|
2001-08-23 01:03:56 +00:00
|
|
|
|
|
|
|
#ifdef CPU_ENABLE_SSE
|
2014-11-02 22:58:30 +00:00
|
|
|
if (use_xsave)
|
|
|
|
xrstor((char *)addr, xsave_mask);
|
|
|
|
else if (cpu_fxsr)
|
2001-07-17 13:06:47 +00:00
|
|
|
fxrstor(addr);
|
2009-03-25 22:08:30 +00:00
|
|
|
else
|
2001-08-23 01:03:56 +00:00
|
|
|
#endif
|
|
|
|
frstor(addr);
|
2001-07-12 06:32:51 +00:00
|
|
|
}
|
|
|
|
|
2004-02-13 18:04:51 +00:00
|
|
|
#ifdef DEV_ISA
|
2000-05-04 23:57:32 +00:00
|
|
|
/*
|
2001-08-30 09:17:03 +00:00
|
|
|
* This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
|
2000-05-04 23:57:32 +00:00
|
|
|
*/
|
|
|
|
static struct isa_pnp_id npxisa_ids[] = {
|
|
|
|
{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
|
|
|
|
{ 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
npxisa_probe(device_t dev)
|
|
|
|
{
|
2000-06-23 08:19:50 +00:00
|
|
|
int result;
|
|
|
|
if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
|
|
|
|
device_quiet(dev);
|
|
|
|
}
|
|
|
|
return(result);
|
2000-05-04 23:57:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
npxisa_attach(device_t dev)
|
|
|
|
{
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static device_method_t npxisa_methods[] = {
|
|
|
|
/* Device interface */
|
|
|
|
DEVMETHOD(device_probe, npxisa_probe),
|
|
|
|
DEVMETHOD(device_attach, npxisa_attach),
|
|
|
|
DEVMETHOD(device_detach, bus_generic_detach),
|
|
|
|
DEVMETHOD(device_shutdown, bus_generic_shutdown),
|
|
|
|
DEVMETHOD(device_suspend, bus_generic_suspend),
|
|
|
|
DEVMETHOD(device_resume, bus_generic_resume),
|
|
|
|
|
|
|
|
{ 0, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static driver_t npxisa_driver = {
|
|
|
|
"npxisa",
|
|
|
|
npxisa_methods,
|
|
|
|
1, /* no softc */
|
|
|
|
};
|
|
|
|
|
|
|
|
static devclass_t npxisa_devclass;
|
|
|
|
|
|
|
|
DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
|
2001-10-21 06:05:08 +00:00
|
|
|
#ifndef PC98
|
2001-08-30 09:17:03 +00:00
|
|
|
DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
|
2001-10-21 06:05:08 +00:00
|
|
|
#endif
|
2002-01-30 12:41:12 +00:00
|
|
|
#endif /* DEV_ISA */
|
2010-06-05 15:59:59 +00:00
|
|
|
|
Add support for the extended FPU states on amd64, both for native
64bit and 32bit ABIs. As a side-effect, it enables AVX on capable
CPUs.
In particular:
- Query the CPU support for XSAVE, list of the supported extensions
and the required size of FPU save area. The hw.use_xsave tunable is
provided for disabling XSAVE, and hw.xsave_mask may be used to
select the enabled extensions.
- Remove the FPU save area from PCB and dynamically allocate the
(run-time sized) user save area on the top of the kernel stack,
right above the PCB. Reorganize the thread0 PCB initialization to
postpone it after BSP is queried for save area size.
- The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as
well. FPU state is only useful for suspend, where it is saved in
dynamically allocated suspfpusave area.
- Use XSAVE and XRSTOR to save/restore FPU state, if supported and
enabled.
- Define new mcontext_t flag _MC_HASFPXSTATE, indicating that
mcontext_t has a valid pointer to out-of-struct extended FPU
state. Signal handlers are supplied with stack-allocated fpu
state. The sigreturn(2) and setcontext(2) syscall honour the flag,
allowing the signal handlers to inspect and manipilate extended
state in the interrupted context.
- The getcontext(2) never returns extended state, since there is no
place in the fixed-sized mcontext_t to place variable-sized save
area. And, since mcontext_t is embedded into ucontext_t, makes it
impossible to fix in a reasonable way. Instead of extending
getcontext(2) syscall, provide a sysarch(2) facility to query
extended FPU state.
- Add ptrace(2) support for getting and setting extended state; while
there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries.
- Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to
consumers, making it opaque. Internally, struct fpu_kern_ctx now
contains a space for the extended state. Convert in-kernel consumers
of fpu_kern KPI both on i386 and amd64.
First version of the support for AVX was submitted by Tim Bird
<tim.bird am sony com> on behalf of Sony. This version was written
from scratch.
Tested by: pho (previous version), Yamagi Burmeister <lists yamagi org>
MFC after: 1 month
2012-01-21 17:45:27 +00:00
|
|
|
static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
|
|
|
|
"Kernel contexts for FPU state");
|
|
|
|
|
|
|
|
#define FPU_KERN_CTX_NPXINITDONE 0x01
|
2014-06-23 07:37:54 +00:00
|
|
|
#define FPU_KERN_CTX_DUMMY 0x02
|
Add support for the extended FPU states on amd64, both for native
64bit and 32bit ABIs. As a side-effect, it enables AVX on capable
CPUs.
In particular:
- Query the CPU support for XSAVE, list of the supported extensions
and the required size of FPU save area. The hw.use_xsave tunable is
provided for disabling XSAVE, and hw.xsave_mask may be used to
select the enabled extensions.
- Remove the FPU save area from PCB and dynamically allocate the
(run-time sized) user save area on the top of the kernel stack,
right above the PCB. Reorganize the thread0 PCB initialization to
postpone it after BSP is queried for save area size.
- The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as
well. FPU state is only useful for suspend, where it is saved in
dynamically allocated suspfpusave area.
- Use XSAVE and XRSTOR to save/restore FPU state, if supported and
enabled.
- Define new mcontext_t flag _MC_HASFPXSTATE, indicating that
mcontext_t has a valid pointer to out-of-struct extended FPU
state. Signal handlers are supplied with stack-allocated fpu
state. The sigreturn(2) and setcontext(2) syscall honour the flag,
allowing the signal handlers to inspect and manipilate extended
state in the interrupted context.
- The getcontext(2) never returns extended state, since there is no
place in the fixed-sized mcontext_t to place variable-sized save
area. And, since mcontext_t is embedded into ucontext_t, makes it
impossible to fix in a reasonable way. Instead of extending
getcontext(2) syscall, provide a sysarch(2) facility to query
extended FPU state.
- Add ptrace(2) support for getting and setting extended state; while
there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries.
- Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to
consumers, making it opaque. Internally, struct fpu_kern_ctx now
contains a space for the extended state. Convert in-kernel consumers
of fpu_kern KPI both on i386 and amd64.
First version of the support for AVX was submitted by Tim Bird
<tim.bird am sony com> on behalf of Sony. This version was written
from scratch.
Tested by: pho (previous version), Yamagi Burmeister <lists yamagi org>
MFC after: 1 month
2012-01-21 17:45:27 +00:00
|
|
|
|
|
|
|
struct fpu_kern_ctx {
|
|
|
|
union savefpu *prev;
|
|
|
|
uint32_t flags;
|
|
|
|
char hwstate1[];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct fpu_kern_ctx *
|
|
|
|
fpu_kern_alloc_ctx(u_int flags)
|
|
|
|
{
|
|
|
|
struct fpu_kern_ctx *res;
|
|
|
|
size_t sz;
|
|
|
|
|
|
|
|
sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
|
2014-11-02 22:58:30 +00:00
|
|
|
cpu_max_ext_state_size;
|
Add support for the extended FPU states on amd64, both for native
64bit and 32bit ABIs. As a side-effect, it enables AVX on capable
CPUs.
In particular:
- Query the CPU support for XSAVE, list of the supported extensions
and the required size of FPU save area. The hw.use_xsave tunable is
provided for disabling XSAVE, and hw.xsave_mask may be used to
select the enabled extensions.
- Remove the FPU save area from PCB and dynamically allocate the
(run-time sized) user save area on the top of the kernel stack,
right above the PCB. Reorganize the thread0 PCB initialization to
postpone it after BSP is queried for save area size.
- The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as
well. FPU state is only useful for suspend, where it is saved in
dynamically allocated suspfpusave area.
- Use XSAVE and XRSTOR to save/restore FPU state, if supported and
enabled.
- Define new mcontext_t flag _MC_HASFPXSTATE, indicating that
mcontext_t has a valid pointer to out-of-struct extended FPU
state. Signal handlers are supplied with stack-allocated fpu
state. The sigreturn(2) and setcontext(2) syscall honour the flag,
allowing the signal handlers to inspect and manipilate extended
state in the interrupted context.
- The getcontext(2) never returns extended state, since there is no
place in the fixed-sized mcontext_t to place variable-sized save
area. And, since mcontext_t is embedded into ucontext_t, makes it
impossible to fix in a reasonable way. Instead of extending
getcontext(2) syscall, provide a sysarch(2) facility to query
extended FPU state.
- Add ptrace(2) support for getting and setting extended state; while
there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries.
- Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to
consumers, making it opaque. Internally, struct fpu_kern_ctx now
contains a space for the extended state. Convert in-kernel consumers
of fpu_kern KPI both on i386 and amd64.
First version of the support for AVX was submitted by Tim Bird
<tim.bird am sony com> on behalf of Sony. This version was written
from scratch.
Tested by: pho (previous version), Yamagi Burmeister <lists yamagi org>
MFC after: 1 month
2012-01-21 17:45:27 +00:00
|
|
|
res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
|
|
|
|
M_NOWAIT : M_WAITOK) | M_ZERO);
|
|
|
|
return (res);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
|
|
|
|
{
|
|
|
|
|
|
|
|
/* XXXKIB clear the memory ? */
|
|
|
|
free(ctx, M_FPUKERN_CTX);
|
|
|
|
}
|
|
|
|
|
|
|
|
static union savefpu *
|
|
|
|
fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
|
|
|
|
{
|
|
|
|
vm_offset_t p;
|
|
|
|
|
|
|
|
p = (vm_offset_t)&ctx->hwstate1;
|
|
|
|
p = roundup2(p, XSAVE_AREA_ALIGN);
|
|
|
|
return ((union savefpu *)p);
|
|
|
|
}
|
|
|
|
|
2010-06-05 15:59:59 +00:00
|
|
|
int
|
|
|
|
fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
|
|
|
|
{
|
|
|
|
struct pcb *pcb;
|
|
|
|
|
2014-06-23 07:37:54 +00:00
|
|
|
if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
|
|
|
|
ctx->flags = FPU_KERN_CTX_DUMMY;
|
|
|
|
return (0);
|
|
|
|
}
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb = td->td_pcb;
|
2014-11-02 22:58:30 +00:00
|
|
|
KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
|
|
|
|
get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
|
2010-06-05 15:59:59 +00:00
|
|
|
ctx->flags = 0;
|
|
|
|
if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
|
|
|
|
ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
|
|
|
|
npxexit(td);
|
|
|
|
ctx->prev = pcb->pcb_save;
|
Add support for the extended FPU states on amd64, both for native
64bit and 32bit ABIs. As a side-effect, it enables AVX on capable
CPUs.
In particular:
- Query the CPU support for XSAVE, list of the supported extensions
and the required size of FPU save area. The hw.use_xsave tunable is
provided for disabling XSAVE, and hw.xsave_mask may be used to
select the enabled extensions.
- Remove the FPU save area from PCB and dynamically allocate the
(run-time sized) user save area on the top of the kernel stack,
right above the PCB. Reorganize the thread0 PCB initialization to
postpone it after BSP is queried for save area size.
- The dumppcb, stoppcbs and susppcbs now do not carry the FPU state as
well. FPU state is only useful for suspend, where it is saved in
dynamically allocated suspfpusave area.
- Use XSAVE and XRSTOR to save/restore FPU state, if supported and
enabled.
- Define new mcontext_t flag _MC_HASFPXSTATE, indicating that
mcontext_t has a valid pointer to out-of-struct extended FPU
state. Signal handlers are supplied with stack-allocated fpu
state. The sigreturn(2) and setcontext(2) syscall honour the flag,
allowing the signal handlers to inspect and manipilate extended
state in the interrupted context.
- The getcontext(2) never returns extended state, since there is no
place in the fixed-sized mcontext_t to place variable-sized save
area. And, since mcontext_t is embedded into ucontext_t, makes it
impossible to fix in a reasonable way. Instead of extending
getcontext(2) syscall, provide a sysarch(2) facility to query
extended FPU state.
- Add ptrace(2) support for getting and setting extended state; while
there, implement missed PT_I386_{GET,SET}XMMREGS for 32bit binaries.
- Change fpu_kern KPI to not expose struct fpu_kern_ctx layout to
consumers, making it opaque. Internally, struct fpu_kern_ctx now
contains a space for the extended state. Convert in-kernel consumers
of fpu_kern KPI both on i386 and amd64.
First version of the support for AVX was submitted by Tim Bird
<tim.bird am sony com> on behalf of Sony. This version was written
from scratch.
Tested by: pho (previous version), Yamagi Burmeister <lists yamagi org>
MFC after: 1 month
2012-01-21 17:45:27 +00:00
|
|
|
pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb->pcb_flags |= PCB_KERNNPX;
|
|
|
|
pcb->pcb_flags &= ~PCB_NPXINITDONE;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
|
|
|
|
{
|
|
|
|
struct pcb *pcb;
|
|
|
|
|
2014-06-23 07:37:54 +00:00
|
|
|
if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
|
|
|
|
return (0);
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb = td->td_pcb;
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_enter();
|
2010-06-05 15:59:59 +00:00
|
|
|
if (curthread == PCPU_GET(fpcurthread))
|
|
|
|
npxdrop();
|
2010-06-23 11:21:19 +00:00
|
|
|
critical_exit();
|
2010-06-05 15:59:59 +00:00
|
|
|
pcb->pcb_save = ctx->prev;
|
2014-11-02 22:58:30 +00:00
|
|
|
if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
|
2010-06-05 15:59:59 +00:00
|
|
|
if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
|
|
|
|
pcb->pcb_flags |= PCB_NPXINITDONE;
|
|
|
|
else
|
|
|
|
pcb->pcb_flags &= ~PCB_NPXINITDONE;
|
|
|
|
pcb->pcb_flags &= ~PCB_KERNNPX;
|
|
|
|
} else {
|
|
|
|
if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
|
|
|
|
pcb->pcb_flags |= PCB_NPXINITDONE;
|
|
|
|
else
|
|
|
|
pcb->pcb_flags &= ~PCB_NPXINITDONE;
|
|
|
|
KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
fpu_kern_thread(u_int flags)
|
|
|
|
{
|
|
|
|
struct pcb *pcb;
|
|
|
|
|
2012-07-26 09:11:37 +00:00
|
|
|
pcb = curpcb;
|
2010-06-05 15:59:59 +00:00
|
|
|
KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
|
|
|
|
("Only kthread may use fpu_kern_thread"));
|
2014-11-02 22:58:30 +00:00
|
|
|
KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
|
2012-07-26 09:11:37 +00:00
|
|
|
("mangled pcb_save"));
|
|
|
|
KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
|
2010-06-05 15:59:59 +00:00
|
|
|
|
2012-07-26 09:11:37 +00:00
|
|
|
curpcb->pcb_flags |= PCB_KERNNPX;
|
2010-06-05 15:59:59 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
is_fpu_kern_thread(u_int flags)
|
|
|
|
{
|
|
|
|
|
|
|
|
if ((curthread->td_pflags & TDP_KTHREAD) == 0)
|
|
|
|
return (0);
|
2012-07-26 09:11:37 +00:00
|
|
|
return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
|
2010-06-05 15:59:59 +00:00
|
|
|
}
|
2014-11-02 22:58:30 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* FPU save area alloc/free/init utility routines
|
|
|
|
*/
|
|
|
|
union savefpu *
|
|
|
|
fpu_save_area_alloc(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (uma_zalloc(fpu_save_area_zone, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fpu_save_area_free(union savefpu *fsa)
|
|
|
|
{
|
|
|
|
|
|
|
|
uma_zfree(fpu_save_area_zone, fsa);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
fpu_save_area_reset(union savefpu *fsa)
|
|
|
|
{
|
|
|
|
|
|
|
|
bcopy(npx_initialstate, fsa, cpu_max_ext_state_size);
|
|
|
|
}
|