9233b45ad9
floating-point arithmetic on i386. Now I'm going to make excuses for why this code is kinda scary: - To avoid breaking the ABI with 5.3-RELEASE, we can't change sizeof(fenv_t). I stuck the saved mxcsr in some discontiguous reserved bits in the existing structure. - Attempting to access the mxcsr on older processors results in an illegal instruction exception, so support for SSE must be detected at runtime. (The extra baggage is optimized away if either the application or libm is compiled with -msse{,2}.) I didn't run tests to ensure that this doesn't SIGILL on older 486's lacking the cpuid instruction or on other processors lacking SSE. Results from running the fenv regression test on these processors would be appreciated. (You'll need to compile the test with -DNO_STRICT_DFL_ENV.) If you have an 80386, or if your processor supports SSE but the kernel didn't enable it, then you're probably out of luck. Also, I un-inlined some of the functions that grew larger as a result of this change, moving them from fenv.h to fenv.c.
212 lines
4.6 KiB
C
212 lines
4.6 KiB
C
/*-
|
|
* Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
#include <sys/types.h>
|
|
#include <machine/npx.h>
|
|
#include "fenv.h"
|
|
|
|
const fenv_t __fe_dfl_env = {
|
|
__INITIAL_NPXCW__,
|
|
0x0000,
|
|
0x0000,
|
|
0x1f80,
|
|
0xffffffff,
|
|
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff }
|
|
};
|
|
|
|
enum __sse_support __has_sse =
|
|
#ifdef __SSE__
|
|
__SSE_YES;
|
|
#else
|
|
__SSE_UNK;
|
|
#endif
|
|
|
|
#define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
|
|
#define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
|
|
#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \
|
|
"cpuid\n\tpopl %%ebx" \
|
|
: "=d" (*(x)) : : "eax", "ecx")
|
|
|
|
/*
|
|
* Test for SSE support on this processor. We need to do this because
|
|
* we need to use ldmxcsr/stmxcsr to get correct results if any part
|
|
* of the program was compiled to use SSE floating-point, but we can't
|
|
* use SSE on older processors.
|
|
*/
|
|
int
|
|
__test_sse(void)
|
|
{
|
|
int flag, nflag;
|
|
int dx_features;
|
|
|
|
/* Am I a 486? */
|
|
getfl(&flag);
|
|
nflag = flag ^ 0x200000;
|
|
setfl(nflag);
|
|
getfl(&nflag);
|
|
if (flag != nflag) {
|
|
/* Not a 486, so CPUID should work. */
|
|
cpuid_dx(&dx_features);
|
|
if (dx_features & 0x2000000) {
|
|
__has_sse = __SSE_YES;
|
|
return (1);
|
|
}
|
|
}
|
|
__has_sse = __SSE_NO;
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
fesetexceptflag(const fexcept_t *flagp, int excepts)
|
|
{
|
|
fenv_t env;
|
|
int mxcsr;
|
|
|
|
__fnstenv(&env);
|
|
env.__status &= ~excepts;
|
|
env.__status |= *flagp & excepts;
|
|
__fldenv(env);
|
|
|
|
if (__HAS_SSE()) {
|
|
__stmxcsr(&mxcsr);
|
|
mxcsr &= ~excepts;
|
|
mxcsr |= *flagp & excepts;
|
|
__ldmxcsr(mxcsr);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
feraiseexcept(int excepts)
|
|
{
|
|
fexcept_t ex = excepts;
|
|
|
|
fesetexceptflag(&ex, excepts);
|
|
__fwait();
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
fegetenv(fenv_t *envp)
|
|
{
|
|
int control, mxcsr;
|
|
|
|
/*
|
|
* fnstenv masks all exceptions, so we need to save and
|
|
* restore the control word to avoid this side effect.
|
|
*/
|
|
__fnstcw(&control);
|
|
__fnstenv(envp);
|
|
if (__HAS_SSE()) {
|
|
__stmxcsr(&mxcsr);
|
|
__set_mxcsr(*envp, mxcsr);
|
|
}
|
|
__fldcw(control);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
feholdexcept(fenv_t *envp)
|
|
{
|
|
int mxcsr;
|
|
|
|
__fnstenv(envp);
|
|
__fnclex();
|
|
if (__HAS_SSE()) {
|
|
__stmxcsr(&mxcsr);
|
|
__set_mxcsr(*envp, mxcsr);
|
|
mxcsr &= ~FE_ALL_EXCEPT;
|
|
mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
|
|
__ldmxcsr(mxcsr);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
feupdateenv(const fenv_t *envp)
|
|
{
|
|
int mxcsr, status;
|
|
|
|
__fnstsw(&status);
|
|
if (__HAS_SSE())
|
|
__stmxcsr(&mxcsr);
|
|
else
|
|
mxcsr = 0;
|
|
fesetenv(envp);
|
|
feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
__feenableexcept(int mask)
|
|
{
|
|
int mxcsr, control, omask;
|
|
|
|
mask &= FE_ALL_EXCEPT;
|
|
__fnstcw(&control);
|
|
if (__HAS_SSE())
|
|
__stmxcsr(&mxcsr);
|
|
else
|
|
mxcsr = 0;
|
|
omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
|
|
control &= ~mask;
|
|
__fldcw(control);
|
|
if (__HAS_SSE()) {
|
|
mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
|
|
__ldmxcsr(mxcsr);
|
|
}
|
|
return (~omask);
|
|
}
|
|
|
|
int
|
|
__fedisableexcept(int mask)
|
|
{
|
|
int mxcsr, control, omask;
|
|
|
|
mask &= FE_ALL_EXCEPT;
|
|
__fnstcw(&control);
|
|
if (__HAS_SSE())
|
|
__stmxcsr(&mxcsr);
|
|
else
|
|
mxcsr = 0;
|
|
omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
|
|
control |= mask;
|
|
__fldcw(control);
|
|
if (__HAS_SSE()) {
|
|
mxcsr |= mask << _SSE_EMASK_SHIFT;
|
|
__ldmxcsr(mxcsr);
|
|
}
|
|
return (~omask);
|
|
}
|
|
|
|
__weak_reference(__feenableexcept, feenableexcept);
|
|
__weak_reference(__fedisableexcept, fedisableexcept);
|