Add userland floating point emulator code for sparc64. This is a port

of the (never committed) in-kernel version (with some optimizations and
cleanups), which in turn was ported from NetBSD.
This commit is contained in:
Thomas Moestl 2002-02-23 21:37:18 +00:00
parent 649e717865
commit 4895e965c3
17 changed files with 3520 additions and 1 deletions

View File

@ -0,0 +1,6 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../libc/sparc64/fpu/
SRCS+= fpu.c fpu_add.c fpu_compare.c fpu_div.c fpu_explode.c fpu_implode.c \
fpu_mul.c fpu_reg.S fpu_sqrt.c fpu_subr.c

508
lib/libc/sparc64/fpu/fpu.c Normal file
View File

@ -0,0 +1,508 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*-
* Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* from: @(#)fpu.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp
*
* $FreeBSD$
*/
#include <sys/param.h>
#include "namespace.h"
#include <errno.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include "un-namespace.h"
#include "libc_private.h"
#include <machine/emul.h>
#include <machine/fp.h>
#include <machine/frame.h>
#include <machine/fsr.h>
#include <machine/instr.h>
#include <machine/pcb.h>
#include <machine/tstate.h>
#include "../sys/__sparc_utrap_private.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* Translate current exceptions into `first' exception. The
* bits go the wrong way for ffs() (0x10 is most important, etc).
* There are only 5, so do it the obvious way.
*/
#define X1(x) x
#define X2(x) x,x
#define X4(x) x,x,x,x
#define X8(x) X4(x),X4(x)
#define X16(x) X8(x),X8(x)
static char cx_to_trapx[] = {
X1(FSR_NX),
X2(FSR_DZ),
X4(FSR_UF),
X8(FSR_OF),
X16(FSR_NV)
};
#ifdef FPU_DEBUG
#ifdef FPU_DEBUG_MASK
int __fpe_debug = FPU_DEBUG_MASK;
#else
int __fpe_debug = 0;
#endif
#endif /* FPU_DEBUG */
static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t, u_long);
static void utrap_write(char *);
static void utrap_kill_self(int);
/*
* System call wrappers usable in an utrap environment.
*/
static void
utrap_write(char *str)
{
int berrno;
berrno = errno;
__sys_write(STDERR_FILENO, str, strlen(str));
errno = berrno;
}
static void
utrap_kill_self(sig)
{
int berrno;
berrno = errno;
__sys_kill(__sys_getpid(), sig);
errno = berrno;
}
void
__fpu_panic(char *msg)
{
utrap_write(msg);
utrap_write("\n");
utrap_kill_self(SIGKILL);
}
/*
* Need to use an fpstate on the stack; we could switch, so we cannot safely
* modify the pcb one, it might get overwritten.
*/
void
__fpu_exception(struct utrapframe *uf)
{
struct fpemu fe;
u_long fsr, tstate;
u_int insn;
int rv;
fsr = uf->uf_fsr;
switch (FSR_GET_FTT(fsr)) {
case FSR_FTT_NONE:
utrap_write("lost FPU trap type\n");
return;
case FSR_FTT_IEEE:
goto fatal;
case FSR_FTT_SEQERR:
utrap_write("FPU sequence error\n");
goto fatal;
case FSR_FTT_HWERR:
utrap_write("FPU hardware error\n");
goto fatal;
case FSR_FTT_UNFIN:
case FSR_FTT_UNIMP:
break;
default:
utrap_write("unknown FPU error\n");
goto fatal;
}
fe.fe_fsr = fsr;
insn = *(u_int32_t *)uf->uf_pc;
if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 &&
IF_F3_OP3(insn) != INS2_FPop2))
__fpu_panic("bogus FP fault");
tstate = uf->uf_state;
rv = __fpu_execute(uf, &fe, insn, tstate);
if (rv != 0)
utrap_kill_self(rv);
__asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr));
return;
fatal:
utrap_kill_self(SIGFPE);
return;
}
#ifdef FPU_DEBUG
/*
* Dump a `fpn' structure.
*/
void
__fpu_dumpfpn(struct fpn *fp)
{
static char *class[] = {
"SNAN", "QNAN", "ZERO", "NUM", "INF"
};
printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
fp->fp_sign ? '-' : ' ',
fp->fp_mant[0], fp->fp_mant[1],
fp->fp_mant[2], fp->fp_mant[3],
fp->fp_exp);
}
#endif
static u_long
fetch_reg(struct utrapframe *uf, int reg)
{
u_long offs;
struct frame *frm;
if (reg == IREG_G0)
return (0);
else if (reg < IREG_O0) /* global */
return (uf->uf_global[reg]);
else if (reg < IREG_L0) /* out */
return (uf->uf_out[reg - IREG_O0]);
else { /* local, in */
/*
* The in registers are immediately after the locals in
* the frame.
*/
frm = (struct frame *)(uf->uf_out[6] + SPOFF);
return (frm->f_local[reg - IREG_L0]);
}
__fpu_panic("fetch_reg: bogus register");
}
static void
__fpu_mov(struct fpemu *fe, int type, int rd, int rs1, int rs2)
{
int i;
i = 1 << type;
__fpu_setreg(rd++, rs1);
while (--i)
__fpu_setreg(rd++, __fpu_getreg(++rs2));
}
static __inline void
__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs1, int rs2,
u_int32_t insn, int fcc)
{
if (IF_F4_COND(insn) == fcc)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
}
static int
__fpu_cmpck(struct fpemu *fe)
{
int cx, fsr;
/*
* The only possible exception here is NV; catch it
* early and get out, as there is no result register.
*/
cx = fe->fe_cx;
fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT);
if (cx != 0) {
if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
fe->fe_fsr = (fsr & ~FSR_FTT_MASK) |
FSR_FTT(FSR_FTT_IEEE);
return (SIGFPE);
}
fsr |= FSR_NV << FSR_AEXC_SHIFT;
}
fe->fe_fsr = fsr;
return (0);
}
static int opmask[] = {0, 0, 1, 3};
/*
* Helper for forming the below case statements. Build only the op3 and opf
* field of the instruction, these are the only that need to match.
*/
#define FOP(op3, opf) \
((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
/*
* Execute an FPU instruction (one that runs entirely in the FPU; not
* FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be
* modified to reflect the setting the hardware would have left.
*
* Note that we do not catch all illegal opcodes, so you can, for instance,
* multiply two integers this way.
*/
static int
__fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long tstate)
{
struct fpn *fp;
int opf, rs1, rs2, rd, type, mask, cx, cond;
u_long reg, fsr;
u_int space[4];
/*
* `Decode' and execute instruction. Start with no exceptions.
* The type of any opf opcode is in the bottom two bits, so we
* squish them out here.
*/
opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
type = IF_F3_OPF(insn) & 3;
mask = opmask[type];
rs1 = IF_F3_RS1(insn) & ~mask;
rs2 = IF_F3_RS2(insn) & ~mask;
rd = IF_F3_RD(insn) & ~mask;
cond = 0;
#ifdef notdef
if ((rs1 | rs2 | rd) & mask)
return (SIGILL);
#endif
fsr = fe->fe_fsr;
fe->fe_fsr &= ~FSR_CEXC_MASK;
fe->fe_cx = 0;
switch (opf) {
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn,
FSR_GET_FCC0(fsr));
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn,
FSR_GET_FCC1(fsr));
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn,
FSR_GET_FCC2(fsr));
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn,
FSR_GET_FCC3(fsr));
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn,
(tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn,
(tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
reg = fetch_reg(uf, IF_F4_RS1(insn));
if (reg == 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
reg = fetch_reg(uf, IF_F4_RS1(insn));
if (reg <= 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
reg = fetch_reg(uf, IF_F4_RS1(insn));
if (reg < 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
reg = fetch_reg(uf, IF_F4_RS1(insn));
if (reg != 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
reg = fetch_reg(uf, IF_F4_RS1(insn));
if (reg > 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
reg = fetch_reg(uf, IF_F4_RS1(insn));
if (reg >= 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop2, INSFP2_FCMP):
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
__fpu_compare(fe, 0);
return (__fpu_cmpck(fe));
case FOP(INS2_FPop2, INSFP2_FCMPE):
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
__fpu_compare(fe, 1);
return (__fpu_cmpck(fe));
case FOP(INS2_FPop1, INSFP1_FMOV): /* these should all be pretty obvious */
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2);
return (0);
case FOP(INS2_FPop1, INSFP1_FNEG):
__fpu_mov(fe, type, rd, __fpu_getreg(rs2) ^ (1 << 31), rs2);
return (0);
case FOP(INS2_FPop1, INSFP1_FABS):
__fpu_mov(fe, type, rd, __fpu_getreg(rs2) & ~(1 << 31), rs2);
return (0);
case FOP(INS2_FPop1, INSFP1_FSQRT):
__fpu_explode(fe, &fe->fe_f1, type, rs2);
fp = __fpu_sqrt(fe);
break;
case FOP(INS2_FPop1, INSFP1_FADD):
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
fp = __fpu_add(fe);
break;
case FOP(INS2_FPop1, INSFP1_FSUB):
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
fp = __fpu_sub(fe);
break;
case FOP(INS2_FPop1, INSFP1_FMUL):
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
fp = __fpu_mul(fe);
break;
case FOP(INS2_FPop1, INSFP1_FDIV):
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
fp = __fpu_div(fe);
break;
case FOP(INS2_FPop1, INSFP1_FsMULd):
case FOP(INS2_FPop1, INSFP1_FdMULq):
if (type == FTYPE_EXT)
return (SIGILL);
__fpu_explode(fe, &fe->fe_f1, type, rs1);
__fpu_explode(fe, &fe->fe_f2, type, rs2);
type++; /* single to double, or double to quad */
/*
* Recalculate rd (the old type applied for the source regs
* only, the target one has a different size).
*/
mask = opmask[type];
rd = IF_F3_RD(insn) & ~mask;
fp = __fpu_mul(fe);
break;
case FOP(INS2_FPop1, INSFP1_FxTOs):
case FOP(INS2_FPop1, INSFP1_FxTOd):
case FOP(INS2_FPop1, INSFP1_FxTOq):
type = FTYPE_LNG;
__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
/* sneaky; depends on instruction encoding */
type = (IF_F3_OPF(insn) >> 2) & 3;
mask = opmask[type];
rd = IF_F3_RD(insn) & ~mask;
break;
case FOP(INS2_FPop1, INSFP1_FTOx):
__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
type = FTYPE_LNG;
mask = 1; /* needs 2 registers */
rd = IF_F3_RD(insn) & ~mask;
break;
case FOP(INS2_FPop1, INSFP1_FTOs):
case FOP(INS2_FPop1, INSFP1_FTOd):
case FOP(INS2_FPop1, INSFP1_FTOq):
case FOP(INS2_FPop1, INSFP1_FTOi):
__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
/* sneaky; depends on instruction encoding */
type = (IF_F3_OPF(insn) >> 2) & 3;
mask = opmask[type];
rd = IF_F3_RD(insn) & ~mask;
break;
default:
return (SIGILL);
}
/*
* ALU operation is complete. Collapse the result and then check
* for exceptions. If we got any, and they are enabled, do not
* alter the destination register, just stop with an exception.
* Otherwise set new current exceptions and accrue.
*/
__fpu_implode(fe, fp, type, space);
cx = fe->fe_cx;
if (cx != 0) {
mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK;
if (cx & mask) {
/* not accrued??? */
fsr = (fsr & ~FSR_FTT_MASK) |
FSR_FTT(FSR_FTT_IEEE) |
FSR_CEXC(cx_to_trapx[(cx & mask) - 1]);
return (SIGFPE);
}
fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
}
fe->fe_fsr = fsr;
__fpu_setreg(rd, space[0]);
if (type >= FTYPE_DBL || type == FTYPE_LNG) {
__fpu_setreg(rd + 1, space[1]);
if (type > FTYPE_DBL) {
__fpu_setreg(rd + 2, space[2]);
__fpu_setreg(rd + 3, space[3]);
}
}
return (0); /* success */
}

View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_add.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_add.c,v 1.3 1996/03/14 19:41:52 christos Exp
*
* $FreeBSD$
*/
/*
* Perform an FPU add (return x + y).
*
* To subtract, negate y and call add.
*/
#include <sys/param.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include <machine/fsr.h>
#include <machine/instr.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
struct fpn *
__fpu_add(fe)
register struct fpemu *fe;
{
register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r;
register u_int r0, r1, r2, r3;
register int rd;
/*
* Put the `heavier' operand on the right (see fpu_emu.h).
* Then we will have one of the following cases, taken in the
* following order:
*
* - y = NaN. Implied: if only one is a signalling NaN, y is.
* The result is y.
* - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN
* case was taken care of earlier).
* If x = -y, the result is NaN. Otherwise the result
* is y (an Inf of whichever sign).
* - y is 0. Implied: x = 0.
* If x and y differ in sign (one positive, one negative),
* the result is +0 except when rounding to -Inf. If same:
* +0 + +0 = +0; -0 + -0 = -0.
* - x is 0. Implied: y != 0.
* Result is y.
* - other. Implied: both x and y are numbers.
* Do addition a la Hennessey & Patterson.
*/
ORDER(x, y);
if (ISNAN(y))
return (y);
if (ISINF(y)) {
if (ISINF(x) && x->fp_sign != y->fp_sign)
return (__fpu_newnan(fe));
return (y);
}
rd = FSR_GET_RD(fe->fe_fsr);
if (ISZERO(y)) {
if (rd != FSR_RD_NINF) /* only -0 + -0 gives -0 */
y->fp_sign &= x->fp_sign;
else /* any -0 operand gives -0 */
y->fp_sign |= x->fp_sign;
return (y);
}
if (ISZERO(x))
return (y);
/*
* We really have two numbers to add, although their signs may
* differ. Make the exponents match, by shifting the smaller
* number right (e.g., 1.011 => 0.1011) and increasing its
* exponent (2^3 => 2^4). Note that we do not alter the exponents
* of x and y here.
*/
r = &fe->fe_f3;
r->fp_class = FPC_NUM;
if (x->fp_exp == y->fp_exp) {
r->fp_exp = x->fp_exp;
r->fp_sticky = 0;
} else {
if (x->fp_exp < y->fp_exp) {
/*
* Try to avoid subtract case iii (see below).
* This also guarantees that x->fp_sticky = 0.
*/
SWAP(x, y);
}
/* now x->fp_exp > y->fp_exp */
r->fp_exp = x->fp_exp;
r->fp_sticky = __fpu_shr(y, x->fp_exp - y->fp_exp);
}
r->fp_sign = x->fp_sign;
if (x->fp_sign == y->fp_sign) {
FPU_DECL_CARRY
/*
* The signs match, so we simply add the numbers. The result
* may be `supernormal' (as big as 1.111...1 + 1.111...1, or
* 11.111...0). If so, a single bit shift-right will fix it
* (but remember to adjust the exponent).
*/
/* r->fp_mant = x->fp_mant + y->fp_mant */
FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]);
FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]);
FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]);
FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]);
if ((r->fp_mant[0] = r0) >= FP_2) {
(void) __fpu_shr(r, 1);
r->fp_exp++;
}
} else {
FPU_DECL_CARRY
/*
* The signs differ, so things are rather more difficult.
* H&P would have us negate the negative operand and add;
* this is the same as subtracting the negative operand.
* This is quite a headache. Instead, we will subtract
* y from x, regardless of whether y itself is the negative
* operand. When this is done one of three conditions will
* hold, depending on the magnitudes of x and y:
* case i) |x| > |y|. The result is just x - y,
* with x's sign, but it may need to be normalized.
* case ii) |x| = |y|. The result is 0 (maybe -0)
* so must be fixed up.
* case iii) |x| < |y|. We goofed; the result should
* be (y - x), with the same sign as y.
* We could compare |x| and |y| here and avoid case iii,
* but that would take just as much work as the subtract.
* We can tell case iii has occurred by an overflow.
*
* N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0.
*/
/* r->fp_mant = x->fp_mant - y->fp_mant */
FPU_SET_CARRY(y->fp_sticky);
FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]);
FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]);
FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]);
FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]);
if (r0 < FP_2) {
/* cases i and ii */
if ((r0 | r1 | r2 | r3) == 0) {
/* case ii */
r->fp_class = FPC_ZERO;
r->fp_sign = rd == FSR_RD_NINF;
return (r);
}
} else {
/*
* Oops, case iii. This can only occur when the
* exponents were equal, in which case neither
* x nor y have sticky bits set. Flip the sign
* (to y's sign) and negate the result to get y - x.
*/
#ifdef DIAGNOSTIC
if (x->fp_exp != y->fp_exp || r->fp_sticky)
__fpu_panic("fpu_add");
#endif
r->fp_sign = y->fp_sign;
FPU_SUBS(r3, 0, r3);
FPU_SUBCS(r2, 0, r2);
FPU_SUBCS(r1, 0, r1);
FPU_SUBC(r0, 0, r0);
}
r->fp_mant[3] = r3;
r->fp_mant[2] = r2;
r->fp_mant[1] = r1;
r->fp_mant[0] = r0;
if (r0 < FP_1)
__fpu_norm(r);
}
return (r);
}

View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)fpu_arith.h 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_arith.h,v 1.3 2000/07/24 04:11:03 mycroft
*
* $FreeBSD$
*/
/*
* Extended-precision arithmetic.
*
* We hold the notion of a `carry register', which may or may not be a
* machine carry bit or register. On the SPARC, it is just the machine's
* carry bit.
*
* In the worst case, you can compute the carry from x+y as
* (unsigned)(x + y) < (unsigned)x
* and from x+y+c as
* ((unsigned)(x + y + c) <= (unsigned)x && (y|c) != 0)
* for example.
*/
/* set up for extended-precision arithemtic */
#define FPU_DECL_CARRY
/*
* We have three kinds of add:
* add with carry: r = x + y + c
* add (ignoring current carry) and set carry: c'r = x + y + 0
* add with carry and set carry: c'r = x + y + c
* The macros use `C' for `use carry' and `S' for `set carry'.
* Note that the state of the carry is undefined after ADDC and SUBC,
* so if all you have for these is `add with carry and set carry',
* that is OK.
*
* The same goes for subtract, except that we compute x - y - c.
*
* Finally, we have a way to get the carry into a `regular' variable,
* or set it from a value. SET_CARRY turns 0 into no-carry, nonzero
* into carry; GET_CARRY sets its argument to 0 or 1.
*/
#define FPU_ADDC(r, x, y) \
__asm __volatile("addx %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
#define FPU_ADDS(r, x, y) \
__asm __volatile("addcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
#define FPU_ADDCS(r, x, y) \
__asm __volatile("addxcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
#define FPU_SUBC(r, x, y) \
__asm __volatile("subx %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
#define FPU_SUBS(r, x, y) \
__asm __volatile("subcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
#define FPU_SUBCS(r, x, y) \
__asm __volatile("subxcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
#define FPU_GET_CARRY(r) __asm __volatile("addx %%g0,%%g0,%0" : "=r"(r))
#define FPU_SET_CARRY(v) __asm __volatile("addcc %0,-1,%%g0" : : "r"(v))
#define FPU_SHL1_BY_ADD /* shift left 1 faster by ADDC than (a<<1)|(b>>31) */

View File

@ -0,0 +1,164 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_compare.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_compare.c,v 1.3 2001/08/26 05:46:31 eeh Exp
*
* $FreeBSD$
*/
/*
* CMP and CMPE instructions.
*
* These rely on the fact that our internal wide format is achieved by
* adding zero bits to the end of narrower mantissas.
*/
#include <sys/types.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include <machine/fsr.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* Perform a compare instruction (with or without unordered exception).
* This updates the fcc field in the fsr.
*
* If either operand is NaN, the result is unordered. For cmpe, this
* causes an NV exception. Everything else is ordered:
* |Inf| > |numbers| > |0|.
* We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0),
* so we get this directly. Note, however, that two zeros compare equal
* regardless of sign, while everything else depends on sign.
*
* Incidentally, two Infs of the same sign compare equal (per the 80387
* manual---it would be nice if the SPARC documentation were more
* complete).
*/
void
__fpu_compare(struct fpemu *fe, int cmpe)
{
register struct fpn *a, *b;
register int cc;
FPU_DECL_CARRY
a = &fe->fe_f1;
b = &fe->fe_f2;
if (ISNAN(a) || ISNAN(b)) {
/*
* In any case, we already got an exception for signalling
* NaNs; here we may replace that one with an identical
* exception, but so what?.
*/
if (cmpe)
fe->fe_cx = FSR_NV;
cc = FSR_CC_UO;
goto done;
}
/*
* Must handle both-zero early to avoid sign goofs. Otherwise,
* at most one is 0, and if the signs differ we are done.
*/
if (ISZERO(a) && ISZERO(b)) {
cc = FSR_CC_EQ;
goto done;
}
if (a->fp_sign) { /* a < 0 (or -0) */
if (!b->fp_sign) { /* b >= 0 (or if a = -0, b > 0) */
cc = FSR_CC_LT;
goto done;
}
} else { /* a > 0 (or +0) */
if (b->fp_sign) { /* b <= -0 (or if a = +0, b < 0) */
cc = FSR_CC_GT;
goto done;
}
}
/*
* Now the signs are the same (but may both be negative). All
* we have left are these cases:
*
* |a| < |b| [classes or values differ]
* |a| > |b| [classes or values differ]
* |a| == |b| [classes and values identical]
*
* We define `diff' here to expand these as:
*
* |a| < |b|, a,b >= 0: a < b => FSR_CC_LT
* |a| < |b|, a,b < 0: a > b => FSR_CC_GT
* |a| > |b|, a,b >= 0: a > b => FSR_CC_GT
* |a| > |b|, a,b < 0: a < b => FSR_CC_LT
*/
#define opposite_cc(cc) ((cc) == FSR_CC_LT ? FSR_CC_GT : FSR_CC_LT)
#define diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) : (magnitude))
if (a->fp_class < b->fp_class) { /* |a| < |b| */
cc = diff(FSR_CC_LT);
goto done;
}
if (a->fp_class > b->fp_class) { /* |a| > |b| */
cc = diff(FSR_CC_GT);
goto done;
}
/* now none can be 0: only Inf and numbers remain */
if (ISINF(a)) { /* |Inf| = |Inf| */
cc = FSR_CC_EQ;
goto done;
}
/*
* Only numbers remain. To compare two numbers in magnitude, we
* simply subtract them.
*/
a = __fpu_sub(fe);
if (a->fp_class == FPC_ZERO)
cc = FSR_CC_EQ;
else
cc = diff(FSR_CC_GT);
done:
fe->fe_fsr = (fe->fe_fsr & ~FSR_FCC0_MASK) | FSR_FCC0(cc);
}

View File

@ -0,0 +1,271 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_div.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_div.c,v 1.2 1994/11/20 20:52:38 deraadt Exp
*
* $FreeBSD$
*/
/*
* Perform an FPU divide (return x / y).
*/
#include <sys/types.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include <machine/fsr.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* Division of normal numbers is done as follows:
*
* x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e.
* If X and Y are the mantissas (1.bbbb's), the quotient is then:
*
* q = (X / Y) * 2^((x exponent) - (y exponent))
*
* Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y)
* will be in [0.5,2.0). Moreover, it will be less than 1.0 if and only
* if X < Y. In that case, it will have to be shifted left one bit to
* become a normal number, and the exponent decremented. Thus, the
* desired exponent is:
*
* left_shift = x->fp_mant < y->fp_mant;
* result_exp = x->fp_exp - y->fp_exp - left_shift;
*
* The quotient mantissa X/Y can then be computed one bit at a time
* using the following algorithm:
*
* Q = 0; -- Initial quotient.
* R = X; -- Initial remainder,
* if (left_shift) -- but fixed up in advance.
* R *= 2;
* for (bit = FP_NMANT; --bit >= 0; R *= 2) {
* if (R >= Y) {
* Q |= 1 << bit;
* R -= Y;
* }
* }
*
* The subtraction R -= Y always removes the uppermost bit from R (and
* can sometimes remove additional lower-order 1 bits); this proof is
* left to the reader.
*
* This loop correctly calculates the guard and round bits since they are
* included in the expanded internal representation. The sticky bit
* is to be set if and only if any other bits beyond guard and round
* would be set. From the above it is obvious that this is true if and
* only if the remainder R is nonzero when the loop terminates.
*
* Examining the loop above, we can see that the quotient Q is built
* one bit at a time ``from the top down''. This means that we can
* dispense with the multi-word arithmetic and just build it one word
* at a time, writing each result word when it is done.
*
* Furthermore, since X and Y are both in [1.0,2.0), we know that,
* initially, R >= Y. (Recall that, if X < Y, R is set to X * 2 and
* is therefore at in [2.0,4.0).) Thus Q is sure to have bit FP_NMANT-1
* set, and R can be set initially to either X - Y (when X >= Y) or
* 2X - Y (when X < Y). In addition, comparing R and Y is difficult,
* so we will simply calculate R - Y and see if that underflows.
* This leads to the following revised version of the algorithm:
*
* R = X;
* bit = FP_1;
* D = R - Y;
* if (D >= 0) {
* result_exp = x->fp_exp - y->fp_exp;
* R = D;
* q = bit;
* bit >>= 1;
* } else {
* result_exp = x->fp_exp - y->fp_exp - 1;
* q = 0;
* }
* R <<= 1;
* do {
* D = R - Y;
* if (D >= 0) {
* q |= bit;
* R = D;
* }
* R <<= 1;
* } while ((bit >>= 1) != 0);
* Q[0] = q;
* for (i = 1; i < 4; i++) {
* q = 0, bit = 1 << 31;
* do {
* D = R - Y;
* if (D >= 0) {
* q |= bit;
* R = D;
* }
* R <<= 1;
* } while ((bit >>= 1) != 0);
* Q[i] = q;
* }
*
* This can be refined just a bit further by moving the `R <<= 1'
* calculations to the front of the do-loops and eliding the first one.
* The process can be terminated immediately whenever R becomes 0, but
* this is relatively rare, and we do not bother.
*/
struct fpn *
__fpu_div(fe)
register struct fpemu *fe;
{
register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
register u_int q, bit;
register u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3;
FPU_DECL_CARRY
/*
* Since divide is not commutative, we cannot just use ORDER.
* Check either operand for NaN first; if there is at least one,
* order the signalling one (if only one) onto the right, then
* return it. Otherwise we have the following cases:
*
* Inf / Inf = NaN, plus NV exception
* Inf / num = Inf [i.e., return x]
* Inf / 0 = Inf [i.e., return x]
* 0 / Inf = 0 [i.e., return x]
* 0 / num = 0 [i.e., return x]
* 0 / 0 = NaN, plus NV exception
* num / Inf = 0
* num / num = num (do the divide)
* num / 0 = Inf, plus DZ exception
*/
if (ISNAN(x) || ISNAN(y)) {
ORDER(x, y);
return (y);
}
if (ISINF(x) || ISZERO(x)) {
if (x->fp_class == y->fp_class)
return (__fpu_newnan(fe));
return (x);
}
/* all results at this point use XOR of operand signs */
x->fp_sign ^= y->fp_sign;
if (ISINF(y)) {
x->fp_class = FPC_ZERO;
return (x);
}
if (ISZERO(y)) {
fe->fe_cx = FSR_DZ;
x->fp_class = FPC_INF;
return (x);
}
/*
* Macros for the divide. See comments at top for algorithm.
* Note that we expand R, D, and Y here.
*/
#define SUBTRACT /* D = R - Y */ \
FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \
FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0)
#define NONNEGATIVE /* D >= 0 */ \
((int)d0 >= 0)
#ifdef FPU_SHL1_BY_ADD
#define SHL1 /* R <<= 1 */ \
FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \
FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0)
#else
#define SHL1 \
r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \
r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1
#endif
#define LOOP /* do ... while (bit >>= 1) */ \
do { \
SHL1; \
SUBTRACT; \
if (NONNEGATIVE) { \
q |= bit; \
r0 = d0, r1 = d1, r2 = d2, r3 = d3; \
} \
} while ((bit >>= 1) != 0)
#define WORD(r, i) /* calculate r->fp_mant[i] */ \
q = 0; \
bit = 1 << 31; \
LOOP; \
(x)->fp_mant[i] = q
/* Setup. Note that we put our result in x. */
r0 = x->fp_mant[0];
r1 = x->fp_mant[1];
r2 = x->fp_mant[2];
r3 = x->fp_mant[3];
y0 = y->fp_mant[0];
y1 = y->fp_mant[1];
y2 = y->fp_mant[2];
y3 = y->fp_mant[3];
bit = FP_1;
SUBTRACT;
if (NONNEGATIVE) {
x->fp_exp -= y->fp_exp;
r0 = d0, r1 = d1, r2 = d2, r3 = d3;
q = bit;
bit >>= 1;
} else {
x->fp_exp -= y->fp_exp + 1;
q = 0;
}
LOOP;
x->fp_mant[0] = q;
WORD(x, 1);
WORD(x, 2);
WORD(x, 3);
x->fp_sticky = r0 | r1 | r2 | r3;
return (x);
}

View File

@ -0,0 +1,185 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)fpu_emu.h 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_emu.h,v 1.4 2000/08/03 18:32:07 eeh
*
* $FreeBSD$
*/
/*
* Floating point emulator (tailored for SPARC, but structurally
* machine-independent).
*
* Floating point numbers are carried around internally in an `expanded'
* or `unpacked' form consisting of:
* - sign
* - unbiased exponent
* - mantissa (`1.' + 112-bit fraction + guard + round)
* - sticky bit
* Any implied `1' bit is inserted, giving a 113-bit mantissa that is
* always nonzero. Additional low-order `guard' and `round' bits are
* scrunched in, making the entire mantissa 115 bits long. This is divided
* into four 32-bit words, with `spare' bits left over in the upper part
* of the top word (the high bits of fp_mant[0]). An internal `exploded'
* number is thus kept within the half-open interval [1.0,2.0) (but see
* the `number classes' below). This holds even for denormalized numbers:
* when we explode an external denorm, we normalize it, introducing low-order
* zero bits, so that the rest of the code always sees normalized values.
*
* Note that a number of our algorithms use the `spare' bits at the top.
* The most demanding algorithm---the one for sqrt---depends on two such
* bits, so that it can represent values up to (but not including) 8.0,
* and then it needs a carry on top of that, so that we need three `spares'.
*
* The sticky-word is 32 bits so that we can use `OR' operators to goosh
* whole words from the mantissa into it.
*
* All operations are done in this internal extended precision. According
* to Hennesey & Patterson, Appendix A, rounding can be repeated---that is,
* it is OK to do a+b in extended precision and then round the result to
* single precision---provided single, double, and extended precisions are
* `far enough apart' (they always are), but we will try to avoid any such
* extra work where possible.
*/
#ifndef _SPARC64_FPU_FPU_EMU_H_
#define _SPARC64_FPU_FPU_EMU_H_
#include "fpu_reg.h"
struct fpn {
int fp_class; /* see below */
int fp_sign; /* 0 => positive, 1 => negative */
int fp_exp; /* exponent (unbiased) */
int fp_sticky; /* nonzero bits lost at right end */
u_int fp_mant[4]; /* 115-bit mantissa */
};
#define FP_NMANT 115 /* total bits in mantissa (incl g,r) */
#define FP_NG 2 /* number of low-order guard bits */
#define FP_LG ((FP_NMANT - 1) & 31) /* log2(1.0) for fp_mant[0] */
#define FP_LG2 ((FP_NMANT - 1) & 63) /* log2(1.0) for fp_mant[0] and fp_mant[1] */
#define FP_QUIETBIT (1 << (FP_LG - 1)) /* Quiet bit in NaNs (0.5) */
#define FP_1 (1 << FP_LG) /* 1.0 in fp_mant[0] */
#define FP_2 (1 << (FP_LG + 1)) /* 2.0 in fp_mant[0] */
/*
* Number classes. Since zero, Inf, and NaN cannot be represented using
* the above layout, we distinguish these from other numbers via a class.
* In addition, to make computation easier and to follow Appendix N of
* the SPARC Version 8 standard, we give each kind of NaN a separate class.
*/
#define FPC_SNAN -2 /* signalling NaN (sign irrelevant) */
#define FPC_QNAN -1 /* quiet NaN (sign irrelevant) */
#define FPC_ZERO 0 /* zero (sign matters) */
#define FPC_NUM 1 /* number (sign matters) */
#define FPC_INF 2 /* infinity (sign matters) */
#define ISNAN(fp) ((fp)->fp_class < 0)
#define ISZERO(fp) ((fp)->fp_class == 0)
#define ISINF(fp) ((fp)->fp_class == FPC_INF)
/*
* ORDER(x,y) `sorts' a pair of `fpn *'s so that the right operand (y) points
* to the `more significant' operand for our purposes. Appendix N says that
* the result of a computation involving two numbers are:
*
* If both are SNaN: operand 2, converted to Quiet
* If only one is SNaN: the SNaN operand, converted to Quiet
* If both are QNaN: operand 2
* If only one is QNaN: the QNaN operand
*
* In addition, in operations with an Inf operand, the result is usually
* Inf. The class numbers are carefully arranged so that if
* (unsigned)class(op1) > (unsigned)class(op2)
* then op1 is the one we want; otherwise op2 is the one we want.
*/
#define ORDER(x, y) { \
if ((u_int)(x)->fp_class > (u_int)(y)->fp_class) \
SWAP(x, y); \
}
#define SWAP(x, y) { \
register struct fpn *swap; \
swap = (x), (x) = (y), (y) = swap; \
}
/*
* Floating point operand types. FTYPE_LNG is syntethic (it does not occur in
* instructions).
*/
#define FTYPE_INT INSFP_i
#define FTYPE_SNG INSFP_s
#define FTYPE_DBL INSFP_d
#define FTYPE_EXT INSFP_q
#define FTYPE_LNG -1
/*
* Emulator state.
*/
struct fpemu {
int fe_fsr; /* fsr copy (modified during op) */
int fe_cx; /* exceptions */
struct fpn fe_f1; /* operand 1 */
struct fpn fe_f2; /* operand 2, if required */
struct fpn fe_f3; /* available storage for result */
};
/*
* Arithmetic functions.
* Each of these may modify its inputs (f1,f2) and/or the temporary.
* Each returns a pointer to the result and/or sets exceptions.
*/
#define __fpu_sub(fe) ((fe)->fe_f2.fp_sign ^= 1, __fpu_add(fe))
#ifdef FPU_DEBUG
#define FPE_INSN 0x1
#define FPE_REG 0x2
extern int __fpe_debug;
void __fpu_dumpfpn(struct fpn *);
#define DPRINTF(x, y) if (__fpe_debug & (x)) printf y
#define DUMPFPN(x, f) if (__fpe_debug & (x)) __fpu_dumpfpn((f))
#else
#define DPRINTF(x, y)
#define DUMPFPN(x, f)
#endif
#endif /* !_SPARC64_FPU_FPU_EXTERN_H_ */

View File

@ -0,0 +1,304 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_explode.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_explode.c,v 1.5 2000/08/03 18:32:08 eeh Exp
*
* $FreeBSD$
*/
/*
* FPU subroutines: `explode' the machine's `packed binary' format numbers
* into our internal format.
*/
#include <sys/param.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include <machine/fsr.h>
#include <machine/ieee.h>
#include <machine/instr.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* N.B.: in all of the following, we assume the FP format is
*
* ---------------------------
* | s | exponent | fraction |
* ---------------------------
*
* (which represents -1**s * 1.fraction * 2**exponent), so that the
* sign bit is way at the top (bit 31), the exponent is next, and
* then the remaining bits mark the fraction. A zero exponent means
* zero or denormalized (0.fraction rather than 1.fraction), and the
* maximum possible exponent, 2bias+1, signals inf (fraction==0) or NaN.
*
* Since the sign bit is always the topmost bit---this holds even for
* integers---we set that outside all the *tof functions. Each function
* returns the class code for the new number (but note that we use
* FPC_QNAN for all NaNs; fpu_explode will fix this if appropriate).
*/
/*
* int -> fpn.
*/
int
__fpu_itof(fp, i)
register struct fpn *fp;
register u_int i;
{
if (i == 0)
return (FPC_ZERO);
/*
* The value FP_1 represents 2^FP_LG, so set the exponent
* there and let normalization fix it up. Convert negative
* numbers to sign-and-magnitude. Note that this relies on
* fpu_norm()'s handling of `supernormals'; see fpu_subr.c.
*/
fp->fp_exp = FP_LG;
fp->fp_mant[0] = (int)i < 0 ? -i : i;
fp->fp_mant[1] = 0;
fp->fp_mant[2] = 0;
fp->fp_mant[3] = 0;
__fpu_norm(fp);
return (FPC_NUM);
}
/*
* 64-bit int -> fpn.
*/
int
__fpu_xtof(fp, i)
register struct fpn *fp;
register u_int64_t i;
{
if (i == 0)
return (FPC_ZERO);
/*
* The value FP_1 represents 2^FP_LG, so set the exponent
* there and let normalization fix it up. Convert negative
* numbers to sign-and-magnitude. Note that this relies on
* fpu_norm()'s handling of `supernormals'; see fpu_subr.c.
*/
fp->fp_exp = FP_LG2;
*((int64_t*)fp->fp_mant) = (int64_t)i < 0 ? -i : i;
fp->fp_mant[2] = 0;
fp->fp_mant[3] = 0;
__fpu_norm(fp);
return (FPC_NUM);
}
#define mask(nbits) ((1L << (nbits)) - 1)
/*
* All external floating formats convert to internal in the same manner,
* as defined here. Note that only normals get an implied 1.0 inserted.
*/
#define FP_TOF(exp, expbias, allfrac, f0, f1, f2, f3) \
if (exp == 0) { \
if (allfrac == 0) \
return (FPC_ZERO); \
fp->fp_exp = 1 - expbias; \
fp->fp_mant[0] = f0; \
fp->fp_mant[1] = f1; \
fp->fp_mant[2] = f2; \
fp->fp_mant[3] = f3; \
__fpu_norm(fp); \
return (FPC_NUM); \
} \
if (exp == (2 * expbias + 1)) { \
if (allfrac == 0) \
return (FPC_INF); \
fp->fp_mant[0] = f0; \
fp->fp_mant[1] = f1; \
fp->fp_mant[2] = f2; \
fp->fp_mant[3] = f3; \
return (FPC_QNAN); \
} \
fp->fp_exp = exp - expbias; \
fp->fp_mant[0] = FP_1 | f0; \
fp->fp_mant[1] = f1; \
fp->fp_mant[2] = f2; \
fp->fp_mant[3] = f3; \
return (FPC_NUM)
/*
* 32-bit single precision -> fpn.
* We assume a single occupies at most (64-FP_LG) bits in the internal
* format: i.e., needs at most fp_mant[0] and fp_mant[1].
*/
int
__fpu_stof(fp, i)
register struct fpn *fp;
register u_int i;
{
register int exp;
register u_int frac, f0, f1;
#define SNG_SHIFT (SNG_FRACBITS - FP_LG)
exp = (i >> (32 - 1 - SNG_EXPBITS)) & mask(SNG_EXPBITS);
frac = i & mask(SNG_FRACBITS);
f0 = frac >> SNG_SHIFT;
f1 = frac << (32 - SNG_SHIFT);
FP_TOF(exp, SNG_EXP_BIAS, frac, f0, f1, 0, 0);
}
/*
* 64-bit double -> fpn.
* We assume this uses at most (96-FP_LG) bits.
*/
int
__fpu_dtof(fp, i, j)
register struct fpn *fp;
register u_int i, j;
{
register int exp;
register u_int frac, f0, f1, f2;
#define DBL_SHIFT (DBL_FRACBITS - 32 - FP_LG)
exp = (i >> (32 - 1 - DBL_EXPBITS)) & mask(DBL_EXPBITS);
frac = i & mask(DBL_FRACBITS - 32);
f0 = frac >> DBL_SHIFT;
f1 = (frac << (32 - DBL_SHIFT)) | (j >> DBL_SHIFT);
f2 = j << (32 - DBL_SHIFT);
frac |= j;
FP_TOF(exp, DBL_EXP_BIAS, frac, f0, f1, f2, 0);
}
/*
* 128-bit extended -> fpn.
*/
int
__fpu_qtof(fp, i, j, k, l)
register struct fpn *fp;
register u_int i, j, k, l;
{
register int exp;
register u_int frac, f0, f1, f2, f3;
#define EXT_SHIFT (-(EXT_FRACBITS - 3 * 32 - FP_LG)) /* left shift! */
/*
* Note that ext and fpn `line up', hence no shifting needed.
*/
exp = (i >> (32 - 1 - EXT_EXPBITS)) & mask(EXT_EXPBITS);
frac = i & mask(EXT_FRACBITS - 3 * 32);
f0 = (frac << EXT_SHIFT) | (j >> (32 - EXT_SHIFT));
f1 = (j << EXT_SHIFT) | (k >> (32 - EXT_SHIFT));
f2 = (k << EXT_SHIFT) | (l >> (32 - EXT_SHIFT));
f3 = l << EXT_SHIFT;
frac |= j | k | l;
FP_TOF(exp, EXT_EXP_BIAS, frac, f0, f1, f2, f3);
}
/*
* Explode the contents of a register / regpair / regquad.
* If the input is a signalling NaN, an NV (invalid) exception
* will be set. (Note that nothing but NV can occur until ALU
* operations are performed.)
*/
void
__fpu_explode(fe, fp, type, reg)
struct fpemu *fe;
struct fpn *fp;
int type, reg;
{
u_int s;
u_int64_t l;
l = __fpu_getreg64(reg & ~1);
s = __fpu_getreg(reg);
fp->fp_sign = s >> 31;
fp->fp_sticky = 0;
switch (type) {
case FTYPE_LNG:
s = __fpu_xtof(fp, l);
break;
case FTYPE_INT:
s = __fpu_itof(fp, s);
break;
case FTYPE_SNG:
s = __fpu_stof(fp, s);
break;
case FTYPE_DBL:
s = __fpu_dtof(fp, s, __fpu_getreg(reg + 1));
break;
case FTYPE_EXT:
s = __fpu_qtof(fp, s, __fpu_getreg(reg + 1),
__fpu_getreg(reg + 2),
__fpu_getreg(reg + 3));
break;
default:
__fpu_panic("fpu_explode");
}
if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) {
/*
* Input is a signalling NaN. All operations that return
* an input NaN operand put it through a ``NaN conversion'',
* which basically just means ``turn on the quiet bit''.
* We do this here so that all NaNs internally look quiet
* (we can tell signalling ones by their class).
*/
fp->fp_mant[0] |= FP_QUIETBIT;
fe->fe_cx = FSR_NV; /* assert invalid operand */
s = FPC_SNAN;
}
fp->fp_class = s;
DPRINTF(FPE_REG, ("fpu_explode: %%%c%d => ", (type == FTYPE_LNG) ? 'x' :
((type == FTYPE_INT) ? 'i' :
((type == FTYPE_SNG) ? 's' :
((type == FTYPE_DBL) ? 'd' :
((type == FTYPE_EXT) ? 'q' : '?')))),
reg));
DUMPFPN(FPE_REG, fp);
DPRINTF(FPE_REG, ("\n"));
}

View File

@ -0,0 +1,97 @@
/*-
* Copyright (c) 1995 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Christos Zoulas.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the NetBSD
* Foundation, Inc. and its contributors.
* 4. Neither the name of The NetBSD Foundation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* from: NetBSD: fpu_extern.h,v 1.4 2000/08/03 18:32:08 eeh Exp
*
* $FreeBSD$
*/
#ifndef _SPARC64_FPU_FPU_EXTERN_H_
#define _SPARC64_FPU_FPU_EXTERN_H_
struct proc;
struct fpstate;
struct utrapframe;
union instr;
struct fpemu;
struct fpn;
/* fpu.c */
void __fpu_exception __P((struct utrapframe *tf));
void __fpu_panic __P((char *msg));
/* fpu_add.c */
struct fpn *__fpu_add __P((struct fpemu *));
/* fpu_compare.c */
void __fpu_compare __P((struct fpemu *, int));
/* fpu_div.c */
struct fpn *__fpu_div __P((struct fpemu *));
/* fpu_explode.c */
int __fpu_itof __P((struct fpn *, u_int));
int __fpu_xtof __P((struct fpn *, u_int64_t));
int __fpu_stof __P((struct fpn *, u_int));
int __fpu_dtof __P((struct fpn *, u_int, u_int ));
int __fpu_qtof __P((struct fpn *, u_int, u_int , u_int , u_int ));
void __fpu_explode __P((struct fpemu *, struct fpn *, int, int ));
/* fpu_implode.c */
u_int __fpu_ftoi __P((struct fpemu *, struct fpn *));
u_int __fpu_ftox __P((struct fpemu *, struct fpn *, u_int *));
u_int __fpu_ftos __P((struct fpemu *, struct fpn *));
u_int __fpu_ftod __P((struct fpemu *, struct fpn *, u_int *));
u_int __fpu_ftoq __P((struct fpemu *, struct fpn *, u_int *));
void __fpu_implode __P((struct fpemu *, struct fpn *, int, u_int *));
/* fpu_mul.c */
struct fpn *__fpu_mul __P((struct fpemu *));
/* fpu_sqrt.c */
struct fpn *__fpu_sqrt __P((struct fpemu *));
/* fpu_subr.c */
/*
* Shift a number right some number of bits, taking care of round/sticky.
* Note that the result is probably not a well-formed number (it will lack
* the normal 1-bit mant[0]&FP_1).
*/
int __fpu_shr __P((register struct fpn *, register int));
void __fpu_norm __P((register struct fpn *));
/* Build a new Quiet NaN (sign=0, frac=all 1's). */
struct fpn *__fpu_newnan __P((register struct fpemu *));
#endif /* !_SPARC64_FPU_FPU_EXTERN_H_ */

View File

@ -0,0 +1,535 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_implode.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_implode.c,v 1.8 2001/08/26 05:44:46 eeh Exp
*
* $FreeBSD$
*/
/*
* FPU subroutines: `implode' internal format numbers into the machine's
* `packed binary' format.
*/
#include <sys/param.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include <machine/fsr.h>
#include <machine/ieee.h>
#include <machine/instr.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
static int round __P((register struct fpemu *, register struct fpn *));
static int toinf __P((struct fpemu *, int));
/*
* Round a number (algorithm from Motorola MC68882 manual, modified for
* our internal format). Set inexact exception if rounding is required.
* Return true iff we rounded up.
*
* After rounding, we discard the guard and round bits by shifting right
* 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky).
* This saves effort later.
*
* Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's
* responsibility to fix this if necessary.
*/
static int
round(register struct fpemu *fe, register struct fpn *fp)
{
register u_int m0, m1, m2, m3;
register int gr, s;
m0 = fp->fp_mant[0];
m1 = fp->fp_mant[1];
m2 = fp->fp_mant[2];
m3 = fp->fp_mant[3];
gr = m3 & 3;
s = fp->fp_sticky;
/* mant >>= FP_NG */
m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
m0 >>= FP_NG;
if ((gr | s) == 0) /* result is exact: no rounding needed */
goto rounddown;
fe->fe_cx |= FSR_NX; /* inexact */
/* Go to rounddown to round down; break to round up. */
switch (FSR_GET_RD(fe->fe_fsr)) {
case FSR_RD_N:
default:
/*
* Round only if guard is set (gr & 2). If guard is set,
* but round & sticky both clear, then we want to round
* but have a tie, so round to even, i.e., add 1 iff odd.
*/
if ((gr & 2) == 0)
goto rounddown;
if ((gr & 1) || fp->fp_sticky || (m3 & 1))
break;
goto rounddown;
case FSR_RD_Z:
/* Round towards zero, i.e., down. */
goto rounddown;
case FSR_RD_NINF:
/* Round towards -Inf: up if negative, down if positive. */
if (fp->fp_sign)
break;
goto rounddown;
case FSR_RD_PINF:
/* Round towards +Inf: up if positive, down otherwise. */
if (!fp->fp_sign)
break;
goto rounddown;
}
/* Bump low bit of mantissa, with carry. */
FPU_ADDS(m3, m3, 1);
FPU_ADDCS(m2, m2, 0);
FPU_ADDCS(m1, m1, 0);
FPU_ADDC(m0, m0, 0);
fp->fp_mant[0] = m0;
fp->fp_mant[1] = m1;
fp->fp_mant[2] = m2;
fp->fp_mant[3] = m3;
return (1);
rounddown:
fp->fp_mant[0] = m0;
fp->fp_mant[1] = m1;
fp->fp_mant[2] = m2;
fp->fp_mant[3] = m3;
return (0);
}
/*
* For overflow: return true if overflow is to go to +/-Inf, according
* to the sign of the overflowing result. If false, overflow is to go
* to the largest magnitude value instead.
*/
static int
toinf(struct fpemu *fe, int sign)
{
int inf;
/* look at rounding direction */
switch (FSR_GET_RD(fe->fe_fsr)) {
default:
case FSR_RD_N: /* the nearest value is always Inf */
inf = 1;
break;
case FSR_RD_Z: /* toward 0 => never towards Inf */
inf = 0;
break;
case FSR_RD_PINF: /* toward +Inf iff positive */
inf = sign == 0;
break;
case FSR_RD_NINF: /* toward -Inf iff negative */
inf = sign;
break;
}
return (inf);
}
/*
* fpn -> int (int value returned as return value).
*
* N.B.: this conversion always rounds towards zero (this is a peculiarity
* of the SPARC instruction set).
*/
u_int
__fpu_ftoi(fe, fp)
struct fpemu *fe;
register struct fpn *fp;
{
register u_int i;
register int sign, exp;
sign = fp->fp_sign;
switch (fp->fp_class) {
case FPC_ZERO:
return (0);
case FPC_NUM:
/*
* If exp >= 2^32, overflow. Otherwise shift value right
* into last mantissa word (this will not exceed 0xffffffff),
* shifting any guard and round bits out into the sticky
* bit. Then ``round'' towards zero, i.e., just set an
* inexact exception if sticky is set (see round()).
* If the result is > 0x80000000, or is positive and equals
* 0x80000000, overflow; otherwise the last fraction word
* is the result.
*/
if ((exp = fp->fp_exp) >= 32)
break;
/* NB: the following includes exp < 0 cases */
if (__fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
fe->fe_cx |= FSR_NX;
i = fp->fp_mant[3];
if (i >= ((u_int)0x80000000 + sign))
break;
return (sign ? -i : i);
default: /* Inf, qNaN, sNaN */
break;
}
/* overflow: replace any inexact exception with invalid */
fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
return (0x7fffffff + sign);
}
/*
* fpn -> extended int (high bits of int value returned as return value).
*
* N.B.: this conversion always rounds towards zero (this is a peculiarity
* of the SPARC instruction set).
*/
u_int
__fpu_ftox(fe, fp, res)
struct fpemu *fe;
register struct fpn *fp;
u_int *res;
{
register u_int64_t i;
register int sign, exp;
sign = fp->fp_sign;
switch (fp->fp_class) {
case FPC_ZERO:
res[1] = 0;
return (0);
case FPC_NUM:
/*
* If exp >= 2^64, overflow. Otherwise shift value right
* into last mantissa word (this will not exceed 0xffffffffffffffff),
* shifting any guard and round bits out into the sticky
* bit. Then ``round'' towards zero, i.e., just set an
* inexact exception if sticky is set (see round()).
* If the result is > 0x8000000000000000, or is positive and equals
* 0x8000000000000000, overflow; otherwise the last fraction word
* is the result.
*/
if ((exp = fp->fp_exp) >= 64)
break;
/* NB: the following includes exp < 0 cases */
if (__fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
fe->fe_cx |= FSR_NX;
i = ((u_int64_t)fp->fp_mant[2]<<32)|fp->fp_mant[3];
if (i >= ((u_int64_t)0x8000000000000000LL + sign))
break;
if (sign)
i = -1;
res[1] = (int)i;
return (i >> 32);
default: /* Inf, qNaN, sNaN */
break;
}
/* overflow: replace any inexact exception with invalid */
fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
return (0x7fffffffffffffffLL + sign);
}
/*
* fpn -> single (32 bit single returned as return value).
* We assume <= 29 bits in a single-precision fraction (1.f part).
*/
u_int
__fpu_ftos(fe, fp)
struct fpemu *fe;
register struct fpn *fp;
{
register u_int sign = fp->fp_sign << 31;
register int exp;
#define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */
#define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */
/* Take care of non-numbers first. */
if (ISNAN(fp)) {
/*
* Preserve upper bits of NaN, per SPARC V8 appendix N.
* Note that fp->fp_mant[0] has the quiet bit set,
* even if it is classified as a signalling NaN.
*/
(void) __fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS);
exp = SNG_EXP_INFNAN;
goto done;
}
if (ISINF(fp))
return (sign | SNG_EXP(SNG_EXP_INFNAN));
if (ISZERO(fp))
return (sign);
/*
* Normals (including subnormals). Drop all the fraction bits
* (including the explicit ``implied'' 1 bit) down into the
* single-precision range. If the number is subnormal, move
* the ``implied'' 1 into the explicit range as well, and shift
* right to introduce leading zeroes. Rounding then acts
* differently for normals and subnormals: the largest subnormal
* may round to the smallest normal (1.0 x 2^minexp), or may
* remain subnormal. In the latter case, signal an underflow
* if the result was inexact or if underflow traps are enabled.
*
* Rounding a normal, on the other hand, always produces another
* normal (although either way the result might be too big for
* single precision, and cause an overflow). If rounding a
* normal produces 2.0 in the fraction, we need not adjust that
* fraction at all, since both 1.0 and 2.0 are zero under the
* fraction mask.
*
* Note that the guard and round bits vanish from the number after
* rounding.
*/
if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */
/* -NG for g,r; -SNG_FRACBITS-exp for fraction */
(void) __fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
return (sign | SNG_EXP(1) | 0);
if ((fe->fe_cx & FSR_NX) ||
(fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
fe->fe_cx |= FSR_UF;
return (sign | SNG_EXP(0) | fp->fp_mant[3]);
}
/* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
(void) __fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
#ifdef DIAGNOSTIC
if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
__fpu_panic("fpu_ftos");
#endif
if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
exp++;
if (exp >= SNG_EXP_INFNAN) {
/* overflow to inf or to max single */
fe->fe_cx |= FSR_OF | FSR_NX;
if (toinf(fe, sign))
return (sign | SNG_EXP(SNG_EXP_INFNAN));
return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK);
}
done:
/* phew, made it */
return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
}
/*
* fpn -> double (32 bit high-order result returned; 32-bit low order result
* left in res[1]). Assumes <= 61 bits in double precision fraction.
*
* This code mimics fpu_ftos; see it for comments.
*/
u_int
__fpu_ftod(fe, fp, res)
struct fpemu *fe;
register struct fpn *fp;
u_int *res;
{
register u_int sign = fp->fp_sign << 31;
register int exp;
#define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31))
#define DBL_MASK (DBL_EXP(1) - 1)
if (ISNAN(fp)) {
(void) __fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS);
exp = DBL_EXP_INFNAN;
goto done;
}
if (ISINF(fp)) {
sign |= DBL_EXP(DBL_EXP_INFNAN);
goto zero;
}
if (ISZERO(fp)) {
zero: res[1] = 0;
return (sign);
}
if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
(void) __fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
res[1] = 0;
return (sign | DBL_EXP(1) | 0);
}
if ((fe->fe_cx & FSR_NX) ||
(fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
fe->fe_cx |= FSR_UF;
exp = 0;
goto done;
}
(void) __fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
exp++;
if (exp >= DBL_EXP_INFNAN) {
fe->fe_cx |= FSR_OF | FSR_NX;
if (toinf(fe, sign)) {
res[1] = 0;
return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0);
}
res[1] = ~0;
return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
}
done:
res[1] = fp->fp_mant[3];
return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
}
/*
* fpn -> extended (32 bit high-order result returned; low-order fraction
* words left in res[1]..res[3]). Like ftod, which is like ftos ... but
* our internal format *is* extended precision, plus 2 bits for guard/round,
* so we can avoid a small bit of work.
*/
u_int
__fpu_ftoq(fe, fp, res)
struct fpemu *fe;
register struct fpn *fp;
u_int *res;
{
register u_int sign = fp->fp_sign << 31;
register int exp;
#define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31))
#define EXT_MASK (EXT_EXP(1) - 1)
if (ISNAN(fp)) {
(void) __fpu_shr(fp, 2); /* since we are not rounding */
exp = EXT_EXP_INFNAN;
goto done;
}
if (ISINF(fp)) {
sign |= EXT_EXP(EXT_EXP_INFNAN);
goto zero;
}
if (ISZERO(fp)) {
zero: res[1] = res[2] = res[3] = 0;
return (sign);
}
if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) {
(void) __fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp);
if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) {
res[1] = res[2] = res[3] = 0;
return (sign | EXT_EXP(1) | 0);
}
if ((fe->fe_cx & FSR_NX) ||
(fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
fe->fe_cx |= FSR_UF;
exp = 0;
goto done;
}
/* Since internal == extended, no need to shift here. */
if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2))
exp++;
if (exp >= EXT_EXP_INFNAN) {
fe->fe_cx |= FSR_OF | FSR_NX;
if (toinf(fe, sign)) {
res[1] = res[2] = res[3] = 0;
return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0);
}
res[1] = res[2] = res[3] = ~0;
return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK);
}
done:
res[1] = fp->fp_mant[1];
res[2] = fp->fp_mant[2];
res[3] = fp->fp_mant[3];
return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK));
}
/*
* Implode an fpn, writing the result into the given space.
*/
void
__fpu_implode(fe, fp, type, space)
struct fpemu *fe;
register struct fpn *fp;
int type;
register u_int *space;
{
switch (type) {
case FTYPE_LNG:
space[0] = __fpu_ftox(fe, fp, space);
break;
case FTYPE_INT:
space[0] = __fpu_ftoi(fe, fp);
break;
case FTYPE_SNG:
space[0] = __fpu_ftos(fe, fp);
break;
case FTYPE_DBL:
space[0] = __fpu_ftod(fe, fp, space);
break;
case FTYPE_EXT:
/* funky rounding precision options ?? */
space[0] = __fpu_ftoq(fe, fp, space);
break;
default:
__fpu_panic("fpu_implode");
}
DPRINTF(FPE_REG, ("fpu_implode: %x %x %x %x\n",
space[0], space[1], space[2], space[3]));
}

View File

@ -0,0 +1,229 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_mul.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_mul.c,v 1.2 1994/11/20 20:52:44 deraadt Exp
*
* $FreeBSD$
*/
/*
* Perform an FPU multiply (return x * y).
*/
#include <sys/types.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* The multiplication algorithm for normal numbers is as follows:
*
* The fraction of the product is built in the usual stepwise fashion.
* Each step consists of shifting the accumulator right one bit
* (maintaining any guard bits) and, if the next bit in y is set,
* adding the multiplicand (x) to the accumulator. Then, in any case,
* we advance one bit leftward in y. Algorithmically:
*
* A = 0;
* for (bit = 0; bit < FP_NMANT; bit++) {
* sticky |= A & 1, A >>= 1;
* if (Y & (1 << bit))
* A += X;
* }
*
* (X and Y here represent the mantissas of x and y respectively.)
* The resultant accumulator (A) is the product's mantissa. It may
* be as large as 11.11111... in binary and hence may need to be
* shifted right, but at most one bit.
*
* Since we do not have efficient multiword arithmetic, we code the
* accumulator as four separate words, just like any other mantissa.
* We use local `register' variables in the hope that this is faster
* than memory. We keep x->fp_mant in locals for the same reason.
*
* In the algorithm above, the bits in y are inspected one at a time.
* We will pick them up 32 at a time and then deal with those 32, one
* at a time. Note, however, that we know several things about y:
*
* - the guard and round bits at the bottom are sure to be zero;
*
* - often many low bits are zero (y is often from a single or double
* precision source);
*
* - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
*
* We can also test for 32-zero-bits swiftly. In this case, the center
* part of the loop---setting sticky, shifting A, and not adding---will
* run 32 times without adding X to A. We can do a 32-bit shift faster
* by simply moving words. Since zeros are common, we optimize this case.
* Furthermore, since A is initially zero, we can omit the shift as well
* until we reach a nonzero word.
*/
struct fpn *
__fpu_mul(fe)
register struct fpemu *fe;
{
register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
register u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
register int sticky;
FPU_DECL_CARRY
/*
* Put the `heavier' operand on the right (see fpu_emu.h).
* Then we will have one of the following cases, taken in the
* following order:
*
* - y = NaN. Implied: if only one is a signalling NaN, y is.
* The result is y.
* - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN
* case was taken care of earlier).
* If x = 0, the result is NaN. Otherwise the result
* is y, with its sign reversed if x is negative.
* - x = 0. Implied: y is 0 or number.
* The result is 0 (with XORed sign as usual).
* - other. Implied: both x and y are numbers.
* The result is x * y (XOR sign, multiply bits, add exponents).
*/
ORDER(x, y);
if (ISNAN(y)) {
y->fp_sign ^= x->fp_sign;
return (y);
}
if (ISINF(y)) {
if (ISZERO(x))
return (__fpu_newnan(fe));
y->fp_sign ^= x->fp_sign;
return (y);
}
if (ISZERO(x)) {
x->fp_sign ^= y->fp_sign;
return (x);
}
/*
* Setup. In the code below, the mask `m' will hold the current
* mantissa byte from y. The variable `bit' denotes the bit
* within m. We also define some macros to deal with everything.
*/
x3 = x->fp_mant[3];
x2 = x->fp_mant[2];
x1 = x->fp_mant[1];
x0 = x->fp_mant[0];
sticky = a3 = a2 = a1 = a0 = 0;
#define ADD /* A += X */ \
FPU_ADDS(a3, a3, x3); \
FPU_ADDCS(a2, a2, x2); \
FPU_ADDCS(a1, a1, x1); \
FPU_ADDC(a0, a0, x0)
#define SHR1 /* A >>= 1, with sticky */ \
sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
#define SHR32 /* A >>= 32, with sticky */ \
sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
#define STEP /* each 1-bit step of the multiplication */ \
SHR1; if (bit & m) { ADD; }; bit <<= 1
/*
* We are ready to begin. The multiply loop runs once for each
* of the four 32-bit words. Some words, however, are special.
* As noted above, the low order bits of Y are often zero. Even
* if not, the first loop can certainly skip the guard bits.
* The last word of y has its highest 1-bit in position FP_NMANT-1,
* so we stop the loop when we move past that bit.
*/
if ((m = y->fp_mant[3]) == 0) {
/* SHR32; */ /* unneeded since A==0 */
} else {
bit = 1 << FP_NG;
do {
STEP;
} while (bit != 0);
}
if ((m = y->fp_mant[2]) == 0) {
SHR32;
} else {
bit = 1;
do {
STEP;
} while (bit != 0);
}
if ((m = y->fp_mant[1]) == 0) {
SHR32;
} else {
bit = 1;
do {
STEP;
} while (bit != 0);
}
m = y->fp_mant[0]; /* definitely != 0 */
bit = 1;
do {
STEP;
} while (bit <= m);
/*
* Done with mantissa calculation. Get exponent and handle
* 11.111...1 case, then put result in place. We reuse x since
* it already has the right class (FP_NUM).
*/
m = x->fp_exp + y->fp_exp;
if (a0 >= FP_2) {
SHR1;
m++;
}
x->fp_sign ^= y->fp_sign;
x->fp_exp = m;
x->fp_sticky = sticky;
x->fp_mant[3] = a3;
x->fp_mant[2] = a2;
x->fp_mant[1] = a1;
x->fp_mant[0] = a0;
return (x);
}

View File

@ -0,0 +1,193 @@
/*-
* Copyright (c) 2002 by Thomas Moestl <tmm@FreeBSD.org>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Define arrays of leaf functions to load/store fp registers to memory. See
* fpu_reg.h for the definitions to use this from C code. The function sizes
* defines there must be kept in sync with this file!
*/
.macro ld32 reg
retl
ld [%o0], %f\reg
.endm
.macro st32 reg
retl
st %f\reg, [%o0]
.endm
.macro ld64 reg
retl
ldd [%o0], %f\reg
.endm
.macro st64 reg
retl
std %f\reg, [%o0]
.endm
/* The actual function arrays. */
.globl __fpu_ld32
__fpu_ld32:
ld32 0
ld32 1
ld32 2
ld32 3
ld32 4
ld32 5
ld32 6
ld32 7
ld32 8
ld32 9
ld32 10
ld32 11
ld32 12
ld32 13
ld32 14
ld32 15
ld32 16
ld32 17
ld32 18
ld32 19
ld32 20
ld32 21
ld32 22
ld32 23
ld32 24
ld32 25
ld32 26
ld32 27
ld32 28
ld32 29
ld32 30
ld32 31
.globl __fpu_st32
__fpu_st32:
st32 0
st32 1
st32 2
st32 3
st32 4
st32 5
st32 6
st32 7
st32 8
st32 9
st32 10
st32 11
st32 12
st32 13
st32 14
st32 15
st32 16
st32 17
st32 18
st32 19
st32 20
st32 21
st32 22
st32 23
st32 24
st32 25
st32 26
st32 27
st32 28
st32 29
st32 30
st32 31
.globl __fpu_ld64
__fpu_ld64:
ld64 0
ld64 2
ld64 4
ld64 6
ld64 8
ld64 10
ld64 12
ld64 14
ld64 16
ld64 18
ld64 20
ld64 22
ld64 24
ld64 26
ld64 28
ld64 30
ld64 32
ld64 34
ld64 36
ld64 38
ld64 40
ld64 42
ld64 44
ld64 46
ld64 48
ld64 50
ld64 52
ld64 54
ld64 56
ld64 58
ld64 60
ld64 62
.globl __fpu_st64
__fpu_st64:
st64 0
st64 2
st64 4
st64 6
st64 8
st64 10
st64 12
st64 14
st64 16
st64 18
st64 20
st64 22
st64 24
st64 26
st64 28
st64 30
st64 32
st64 34
st64 36
st64 38
st64 40
st64 42
st64 44
st64 46
st64 48
st64 50
st64 52
st64 54
st64 56
st64 58
st64 60
st64 62

View File

@ -0,0 +1,88 @@
/*-
* Copyright (c) 2002 by Thomas Moestl <tmm@FreeBSD.org>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _LIBC_SPARC64_FPU_FPU_REG_H_
#define _LIBC_SPARC64_FPU_FPU_REG_H_
/*
* These are not really of type char[]. They are are arrays of functions defined
* in fpu_reg.S; each array member loads/stores a certain fpu register of the
* given size.
*/
extern char __fpu_ld32[];
extern char __fpu_st32[];
extern char __fpu_ld64[];
extern char __fpu_st64[];
/* Size of the functions in the arrays. */
#define FPU_LD32_SZ 8
#define FPU_ST32_SZ 8
#define FPU_LD64_SZ 8
#define FPU_ST64_SZ 8
/* Typedefs for convenient casts in the functions below. */
typedef void (fp_ldst32_fn)(u_int32_t *);
typedef void (fp_ldst64_fn)(u_int64_t *);
/*
* These are the functions that are actually used in the fpu emulation code to
* access the fp registers. They are usually not used more than once, so
* cacheing needs not be done here.
*/
static __inline u_int32_t
__fpu_getreg(int r)
{
u_int32_t rv;
((fp_ldst32_fn *)&__fpu_st32[r * FPU_ST32_SZ])(&rv);
return (rv);
}
static __inline u_int64_t
__fpu_getreg64(int r)
{
u_int64_t rv;
((fp_ldst64_fn *)&__fpu_st64[(r >> 1) * FPU_ST64_SZ])(&rv);
return (rv);
}
static __inline void
__fpu_setreg(int r, u_int32_t v)
{
((fp_ldst32_fn *)&__fpu_ld32[r * FPU_LD32_SZ])(&v);
}
static __inline void
__fpu_setreg64(int r, u_int64_t v)
{
((fp_ldst64_fn *)&__fpu_ld64[(r >> 1) * FPU_LD64_SZ])(&v);
}
#endif /* _LIBC_SPARC64_FPU_FPU_REG_H_ */

View File

@ -0,0 +1,400 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_sqrt.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_sqrt.c,v 1.2 1994/11/20 20:52:46 deraadt Exp
*
* $FreeBSD$
*/
/*
* Perform an FPU square root (return sqrt(x)).
*/
#include <sys/types.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* Our task is to calculate the square root of a floating point number x0.
* This number x normally has the form:
*
* exp
* x = mant * 2 (where 1 <= mant < 2 and exp is an integer)
*
* This can be left as it stands, or the mantissa can be doubled and the
* exponent decremented:
*
* exp-1
* x = (2 * mant) * 2 (where 2 <= 2 * mant < 4)
*
* If the exponent `exp' is even, the square root of the number is best
* handled using the first form, and is by definition equal to:
*
* exp/2
* sqrt(x) = sqrt(mant) * 2
*
* If exp is odd, on the other hand, it is convenient to use the second
* form, giving:
*
* (exp-1)/2
* sqrt(x) = sqrt(2 * mant) * 2
*
* In the first case, we have
*
* 1 <= mant < 2
*
* and therefore
*
* sqrt(1) <= sqrt(mant) < sqrt(2)
*
* while in the second case we have
*
* 2 <= 2*mant < 4
*
* and therefore
*
* sqrt(2) <= sqrt(2*mant) < sqrt(4)
*
* so that in any case, we are sure that
*
* sqrt(1) <= sqrt(n * mant) < sqrt(4), n = 1 or 2
*
* or
*
* 1 <= sqrt(n * mant) < 2, n = 1 or 2.
*
* This root is therefore a properly formed mantissa for a floating
* point number. The exponent of sqrt(x) is either exp/2 or (exp-1)/2
* as above. This leaves us with the problem of finding the square root
* of a fixed-point number in the range [1..4).
*
* Though it may not be instantly obvious, the following square root
* algorithm works for any integer x of an even number of bits, provided
* that no overflows occur:
*
* let q = 0
* for k = NBITS-1 to 0 step -1 do -- for each digit in the answer...
* x *= 2 -- multiply by radix, for next digit
* if x >= 2q + 2^k then -- if adding 2^k does not
* x -= 2q + 2^k -- exceed the correct root,
* q += 2^k -- add 2^k and adjust x
* fi
* done
* sqrt = q / 2^(NBITS/2) -- (and any remainder is in x)
*
* If NBITS is odd (so that k is initially even), we can just add another
* zero bit at the top of x. Doing so means that q is not going to acquire
* a 1 bit in the first trip around the loop (since x0 < 2^NBITS). If the
* final value in x is not needed, or can be off by a factor of 2, this is
* equivalant to moving the `x *= 2' step to the bottom of the loop:
*
* for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done
*
* and the result q will then be sqrt(x0) * 2^floor(NBITS / 2).
* (Since the algorithm is destructive on x, we will call x's initial
* value, for which q is some power of two times its square root, x0.)
*
* If we insert a loop invariant y = 2q, we can then rewrite this using
* C notation as:
*
* q = y = 0; x = x0;
* for (k = NBITS; --k >= 0;) {
* #if (NBITS is even)
* x *= 2;
* #endif
* t = y + (1 << k);
* if (x >= t) {
* x -= t;
* q += 1 << k;
* y += 1 << (k + 1);
* }
* #if (NBITS is odd)
* x *= 2;
* #endif
* }
*
* If x0 is fixed point, rather than an integer, we can simply alter the
* scale factor between q and sqrt(x0). As it happens, we can easily arrange
* for the scale factor to be 2**0 or 1, so that sqrt(x0) == q.
*
* In our case, however, x0 (and therefore x, y, q, and t) are multiword
* integers, which adds some complication. But note that q is built one
* bit at a time, from the top down, and is not used itself in the loop
* (we use 2q as held in y instead). This means we can build our answer
* in an integer, one word at a time, which saves a bit of work. Also,
* since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are
* `new' bits in y and we can set them with an `or' operation rather than
* a full-blown multiword add.
*
* We are almost done, except for one snag. We must prove that none of our
* intermediate calculations can overflow. We know that x0 is in [1..4)
* and therefore the square root in q will be in [1..2), but what about x,
* y, and t?
*
* We know that y = 2q at the beginning of each loop. (The relation only
* fails temporarily while y and q are being updated.) Since q < 2, y < 4.
* The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and.
* Furthermore, we can prove with a bit of work that x never exceeds y by
* more than 2, so that even after doubling, 0 <= x < 8. (This is left as
* an exercise to the reader, mostly because I have become tired of working
* on this comment.)
*
* If our floating point mantissas (which are of the form 1.frac) occupy
* B+1 bits, our largest intermediary needs at most B+3 bits, or two extra.
* In fact, we want even one more bit (for a carry, to avoid compares), or
* three extra. There is a comment in fpu_emu.h reminding maintainers of
* this, so we have some justification in assuming it.
*/
struct fpn *
__fpu_sqrt(fe)
struct fpemu *fe;
{
register struct fpn *x = &fe->fe_f1;
register u_int bit, q, tt;
register u_int x0, x1, x2, x3;
register u_int y0, y1, y2, y3;
register u_int d0, d1, d2, d3;
register int e;
/*
* Take care of special cases first. In order:
*
* sqrt(NaN) = NaN
* sqrt(+0) = +0
* sqrt(-0) = -0
* sqrt(x < 0) = NaN (including sqrt(-Inf))
* sqrt(+Inf) = +Inf
*
* Then all that remains are numbers with mantissas in [1..2).
*/
if (ISNAN(x) || ISZERO(x))
return (x);
if (x->fp_sign)
return (__fpu_newnan(fe));
if (ISINF(x))
return (x);
/*
* Calculate result exponent. As noted above, this may involve
* doubling the mantissa. We will also need to double x each
* time around the loop, so we define a macro for this here, and
* we break out the multiword mantissa.
*/
#ifdef FPU_SHL1_BY_ADD
#define DOUBLE_X { \
FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \
FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \
}
#else
#define DOUBLE_X { \
x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \
x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \
}
#endif
#if (FP_NMANT & 1) != 0
# define ODD_DOUBLE DOUBLE_X
# define EVEN_DOUBLE /* nothing */
#else
# define ODD_DOUBLE /* nothing */
# define EVEN_DOUBLE DOUBLE_X
#endif
x0 = x->fp_mant[0];
x1 = x->fp_mant[1];
x2 = x->fp_mant[2];
x3 = x->fp_mant[3];
e = x->fp_exp;
if (e & 1) /* exponent is odd; use sqrt(2mant) */
DOUBLE_X;
/* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */
x->fp_exp = e >> 1; /* calculates (e&1 ? (e-1)/2 : e/2 */
/*
* Now calculate the mantissa root. Since x is now in [1..4),
* we know that the first trip around the loop will definitely
* set the top bit in q, so we can do that manually and start
* the loop at the next bit down instead. We must be sure to
* double x correctly while doing the `known q=1.0'.
*
* We do this one mantissa-word at a time, as noted above, to
* save work. To avoid `(1 << 31) << 1', we also do the top bit
* outside of each per-word loop.
*
* The calculation `t = y + bit' breaks down into `t0 = y0, ...,
* t3 = y3, t? |= bit' for the appropriate word. Since the bit
* is always a `new' one, this means that three of the `t?'s are
* just the corresponding `y?'; we use `#define's here for this.
* The variable `tt' holds the actual `t?' variable.
*/
/* calculate q0 */
#define t0 tt
bit = FP_1;
EVEN_DOUBLE;
/* if (x >= (t0 = y0 | bit)) { */ /* always true */
q = bit;
x0 -= bit;
y0 = bit << 1;
/* } */
ODD_DOUBLE;
while ((bit >>= 1) != 0) { /* for remaining bits in q0 */
EVEN_DOUBLE;
t0 = y0 | bit; /* t = y + bit */
if (x0 >= t0) { /* if x >= t then */
x0 -= t0; /* x -= t */
q |= bit; /* q += bit */
y0 |= bit << 1; /* y += bit << 1 */
}
ODD_DOUBLE;
}
x->fp_mant[0] = q;
#undef t0
/* calculate q1. note (y0&1)==0. */
#define t0 y0
#define t1 tt
q = 0;
y1 = 0;
bit = 1 << 31;
EVEN_DOUBLE;
t1 = bit;
FPU_SUBS(d1, x1, t1);
FPU_SUBC(d0, x0, t0); /* d = x - t */
if ((int)d0 >= 0) { /* if d >= 0 (i.e., x >= t) then */
x0 = d0, x1 = d1; /* x -= t */
q = bit; /* q += bit */
y0 |= 1; /* y += bit << 1 */
}
ODD_DOUBLE;
while ((bit >>= 1) != 0) { /* for remaining bits in q1 */
EVEN_DOUBLE; /* as before */
t1 = y1 | bit;
FPU_SUBS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
if ((int)d0 >= 0) {
x0 = d0, x1 = d1;
q |= bit;
y1 |= bit << 1;
}
ODD_DOUBLE;
}
x->fp_mant[1] = q;
#undef t1
/* calculate q2. note (y1&1)==0; y0 (aka t0) is fixed. */
#define t1 y1
#define t2 tt
q = 0;
y2 = 0;
bit = 1 << 31;
EVEN_DOUBLE;
t2 = bit;
FPU_SUBS(d2, x2, t2);
FPU_SUBCS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
if ((int)d0 >= 0) {
x0 = d0, x1 = d1, x2 = d2;
q |= bit;
y1 |= 1; /* now t1, y1 are set in concrete */
}
ODD_DOUBLE;
while ((bit >>= 1) != 0) {
EVEN_DOUBLE;
t2 = y2 | bit;
FPU_SUBS(d2, x2, t2);
FPU_SUBCS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
if ((int)d0 >= 0) {
x0 = d0, x1 = d1, x2 = d2;
q |= bit;
y2 |= bit << 1;
}
ODD_DOUBLE;
}
x->fp_mant[2] = q;
#undef t2
/* calculate q3. y0, t0, y1, t1 all fixed; y2, t2, almost done. */
#define t2 y2
#define t3 tt
q = 0;
y3 = 0;
bit = 1 << 31;
EVEN_DOUBLE;
t3 = bit;
FPU_SUBS(d3, x3, t3);
FPU_SUBCS(d2, x2, t2);
FPU_SUBCS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
ODD_DOUBLE;
if ((int)d0 >= 0) {
x0 = d0, x1 = d1, x2 = d2;
q |= bit;
y2 |= 1;
}
while ((bit >>= 1) != 0) {
EVEN_DOUBLE;
t3 = y3 | bit;
FPU_SUBS(d3, x3, t3);
FPU_SUBCS(d2, x2, t2);
FPU_SUBCS(d1, x1, t1);
FPU_SUBC(d0, x0, t0);
if ((int)d0 >= 0) {
x0 = d0, x1 = d1, x2 = d2;
q |= bit;
y3 |= bit << 1;
}
ODD_DOUBLE;
}
x->fp_mant[3] = q;
/*
* The result, which includes guard and round bits, is exact iff
* x is now zero; any nonzero bits in x represent sticky bits.
*/
x->fp_sticky = x0 | x1 | x2 | x3;
return (x);
}

View File

@ -0,0 +1,223 @@
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu_subr.c 8.1 (Berkeley) 6/11/93
* from: NetBSD: fpu_subr.c,v 1.3 1996/03/14 19:42:01 christos Exp
*
* $FreeBSD$
*/
/*
* FPU subroutines.
*/
#include <sys/param.h>
#include <machine/frame.h>
#include <machine/fp.h>
#include <machine/fsr.h>
#include <machine/instr.h>
#include "fpu_arith.h"
#include "fpu_emu.h"
#include "fpu_extern.h"
/*
* Shift the given number right rsh bits. Any bits that `fall off' will get
* shoved into the sticky field; we return the resulting sticky. Note that
* shifting NaNs is legal (this will never shift all bits out); a NaN's
* sticky field is ignored anyway.
*/
int
__fpu_shr(register struct fpn *fp, register int rsh)
{
register u_int m0, m1, m2, m3, s;
register int lsh;
#ifdef DIAGNOSTIC
if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp)))
__fpu_panic("fpu_rightshift 1");
#endif
m0 = fp->fp_mant[0];
m1 = fp->fp_mant[1];
m2 = fp->fp_mant[2];
m3 = fp->fp_mant[3];
/* If shifting all the bits out, take a shortcut. */
if (rsh >= FP_NMANT) {
#ifdef DIAGNOSTIC
if ((m0 | m1 | m2 | m3) == 0)
__fpu_panic("fpu_rightshift 2");
#endif
fp->fp_mant[0] = 0;
fp->fp_mant[1] = 0;
fp->fp_mant[2] = 0;
fp->fp_mant[3] = 0;
#ifdef notdef
if ((m0 | m1 | m2 | m3) == 0)
fp->fp_class = FPC_ZERO;
else
#endif
fp->fp_sticky = 1;
return (1);
}
/* Squish out full words. */
s = fp->fp_sticky;
if (rsh >= 32 * 3) {
s |= m3 | m2 | m1;
m3 = m0, m2 = 0, m1 = 0, m0 = 0;
} else if (rsh >= 32 * 2) {
s |= m3 | m2;
m3 = m1, m2 = m0, m1 = 0, m0 = 0;
} else if (rsh >= 32) {
s |= m3;
m3 = m2, m2 = m1, m1 = m0, m0 = 0;
}
/* Handle any remaining partial word. */
if ((rsh &= 31) != 0) {
lsh = 32 - rsh;
s |= m3 << lsh;
m3 = (m3 >> rsh) | (m2 << lsh);
m2 = (m2 >> rsh) | (m1 << lsh);
m1 = (m1 >> rsh) | (m0 << lsh);
m0 >>= rsh;
}
fp->fp_mant[0] = m0;
fp->fp_mant[1] = m1;
fp->fp_mant[2] = m2;
fp->fp_mant[3] = m3;
fp->fp_sticky = s;
return (s);
}
/*
* Force a number to be normal, i.e., make its fraction have all zero
* bits before FP_1, then FP_1, then all 1 bits. This is used for denorms
* and (sometimes) for intermediate results.
*
* Internally, this may use a `supernormal' -- a number whose fp_mant
* is greater than or equal to 2.0 -- so as a side effect you can hand it
* a supernormal and it will fix it (provided fp->fp_mant[3] == 0).
*/
void
__fpu_norm(register struct fpn *fp)
{
register u_int m0, m1, m2, m3, top, sup, nrm;
register int lsh, rsh, exp;
exp = fp->fp_exp;
m0 = fp->fp_mant[0];
m1 = fp->fp_mant[1];
m2 = fp->fp_mant[2];
m3 = fp->fp_mant[3];
/* Handle severe subnormals with 32-bit moves. */
if (m0 == 0) {
if (m1)
m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32;
else if (m2)
m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32;
else if (m3)
m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32;
else {
fp->fp_class = FPC_ZERO;
return;
}
}
/* Now fix any supernormal or remaining subnormal. */
nrm = FP_1;
sup = nrm << 1;
if (m0 >= sup) {
/*
* We have a supernormal number. We need to shift it right.
* We may assume m3==0.
*/
for (rsh = 1, top = m0 >> 1; top >= sup; rsh++) /* XXX slow */
top >>= 1;
exp += rsh;
lsh = 32 - rsh;
m3 = m2 << lsh;
m2 = (m2 >> rsh) | (m1 << lsh);
m1 = (m1 >> rsh) | (m0 << lsh);
m0 = top;
} else if (m0 < nrm) {
/*
* We have a regular denorm (a subnormal number), and need
* to shift it left.
*/
for (lsh = 1, top = m0 << 1; top < nrm; lsh++) /* XXX slow */
top <<= 1;
exp -= lsh;
rsh = 32 - lsh;
m0 = top | (m1 >> rsh);
m1 = (m1 << lsh) | (m2 >> rsh);
m2 = (m2 << lsh) | (m3 >> rsh);
m3 <<= lsh;
}
fp->fp_exp = exp;
fp->fp_mant[0] = m0;
fp->fp_mant[1] = m1;
fp->fp_mant[2] = m2;
fp->fp_mant[3] = m3;
}
/*
* Concoct a `fresh' Quiet NaN per Appendix N.
* As a side effect, we set NV (invalid) for the current exceptions.
*/
struct fpn *
__fpu_newnan(register struct fpemu *fe)
{
register struct fpn *fp;
fe->fe_cx = FSR_NV;
fp = &fe->fe_f3;
fp->fp_class = FPC_QNAN;
fp->fp_sign = 0;
fp->fp_mant[0] = FP_1 - 1;
fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0;
return (fp);
}

View File

@ -81,6 +81,9 @@ __sparc_utrap(struct utrapframe *uf)
switch (uf->uf_type) {
case UT_FP_EXCEPTION_IEEE_754:
case UT_FP_EXCEPTION_OTHER:
__fpu_exception(uf);
UF_DONE(uf);
return;
case UT_ILLEGAL_INSTRUCTION:
case UT_MEM_ADDRESS_NOT_ALIGNED:
break;

View File

@ -38,9 +38,9 @@ __FBSDID("$FreeBSD$");
static const struct sparc_utrap_args ua[] = {
{ UT_FP_DISABLED, __sparc_utrap_fp_disabled, NULL, NULL, NULL },
#if 0
{ UT_FP_EXCEPTION_IEEE_754, __sparc_utrap_gen, NULL, NULL, NULL },
{ UT_FP_EXCEPTION_OTHER, __sparc_utrap_gen, NULL, NULL, NULL },
#if 0
{ UT_ILLEGAL_INSTRUCTION, __sparc_utrap_gen, NULL, NULL, NULL },
{ UT_MEM_ADDRESS_NOT_ALIGNED, __sparc_utrap_gen, NULL, NULL, NULL },
#endif