diff --git a/sys/alpha/alpha/fp_emulate.c b/sys/alpha/alpha/fp_emulate.c new file mode 100644 index 000000000000..32ec7eeb310e --- /dev/null +++ b/sys/alpha/alpha/fp_emulate.c @@ -0,0 +1,393 @@ +/*- + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GETREG(regs, i) (*(fp_register_t*) ®s->fpr_regs[i]) +#define PUTREG(regs, i, v) (*(fp_register_t*) ®s->fpr_regs[i] = v) + +typedef fp_register_t fp_opcode_handler(union alpha_instruction ins, + int src, int rnd, + u_int64_t fp_control, + u_int64_t *status, + struct fpreg *fpregs); + +static fp_register_t fp_add(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_add(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + src, rnd, control, status); +} + +static fp_register_t fp_sub(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_sub(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + src, rnd, control, status); +} + +static fp_register_t fp_mul(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_mul(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + src, rnd, control, status); +} + +static fp_register_t fp_div(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_div(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + src, rnd, control, status); +} + +static fp_register_t fp_cmpun(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_cmpun(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + status); +} + +static fp_register_t fp_cmpeq(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_cmpeq(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + status); +} + +static fp_register_t fp_cmplt(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_cmplt(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + status); +} + +static fp_register_t fp_cmple(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + return ieee_cmple(GETREG(fpregs, ins.f_format.fa), + GETREG(fpregs, ins.f_format.fb), + status); +} + +static fp_register_t fp_cvts(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + switch (src) { + case T_FORMAT: + return ieee_convert_T_S(GETREG(fpregs, ins.f_format.fb), + rnd, control, status); + + case Q_FORMAT: + return ieee_convert_Q_S(GETREG(fpregs, ins.f_format.fb), + rnd, control, status); + + default: + *status |= FPCR_INV; + return GETREG(fpregs, ins.f_format.fc); + } +} + +static fp_register_t fp_cvtt(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + switch (src) { + case S_FORMAT: + return ieee_convert_S_T(GETREG(fpregs, ins.f_format.fb), + rnd, control, status); + break; + + case Q_FORMAT: + return ieee_convert_Q_T(GETREG(fpregs, ins.f_format.fb), + rnd, control, status); + break; + + default: + *status |= FPCR_INV; + return GETREG(fpregs, ins.f_format.fc); + } +} + +static fp_register_t fp_cvtq(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + switch (src) { + case S_FORMAT: + return ieee_convert_S_Q(GETREG(fpregs, ins.f_format.fb), + rnd, control, status); + break; + + case T_FORMAT: + return ieee_convert_T_Q(GETREG(fpregs, ins.f_format.fb), + rnd, control, status); + break; + + default: + *status |= FPCR_INV; + return GETREG(fpregs, ins.f_format.fc); + } +} + +static fp_register_t fp_reserved(union alpha_instruction ins, + int src, int rnd, + u_int64_t control, u_int64_t *status, + struct fpreg *fpregs) +{ + *status |= FPCR_INV; + return GETREG(fpregs, ins.f_format.fc); +} + +static int fp_emulate(union alpha_instruction ins, struct proc *p) +{ + u_int64_t control = p->p_addr->u_pcb.pcb_fp_control; + struct fpreg *fpregs = &p->p_addr->u_pcb.pcb_fp; + static fp_opcode_handler *ops[16] = { + fp_add, /* 0 */ + fp_sub, /* 1 */ + fp_mul, /* 2 */ + fp_div, /* 3 */ + fp_cmpun, /* 4 */ + fp_cmpeq, /* 5 */ + fp_cmplt, /* 6 */ + fp_cmple, /* 7 */ + fp_reserved, /* 8 */ + fp_reserved, /* 9 */ + fp_reserved, /* 10 */ + fp_reserved, /* 11 */ + fp_cvts, /* 12 */ + fp_reserved, /* 13 */ + fp_cvtt, /* 14 */ + fp_cvtq, /* 15 */ + }; + int src, rnd; + fp_register_t result; + u_int64_t status; + + /* + * Only attempt to emulate ieee instructions. + */ + if (ins.common.opcode != op_flti) + return 0; + + /* + * Dump the float registers into the pcb so we can get at + * them. + */ + if (p == fpcurproc) { + alpha_pal_wrfen(1); + savefpstate(&fpcurproc->p_addr->u_pcb.pcb_fp); + alpha_pal_wrfen(0); + fpcurproc = NULL; + } + + /* + * Decode and execute the instruction. + */ + src = (ins.f_format.function >> 4) & 3; + rnd = (ins.f_format.function >> 6) & 3; + if (rnd == 3) + rnd = (fpregs->fpr_cr >> FPCR_DYN_SHIFT) & 3; + status = 0; + result = ops[ins.f_format.function & 0xf](ins, src, rnd, + control, &status, + fpregs); + + /* + * Handle exceptions. + */ + if (status) { + u_int64_t fpcr; + + /* Record the exception in the software control word. */ + control |= (status >> IEEE_STATUS_TO_FPCR_SHIFT); + p->p_addr->u_pcb.pcb_fp_control = control; + + /* Regenerate the control register */ + fpcr = fpregs->fpr_cr & FPCR_DYN_MASK; + fpcr |= ((control & IEEE_STATUS_MASK) + << IEEE_STATUS_TO_FPCR_SHIFT); + if (!(control & IEEE_TRAP_ENABLE_INV)) + fpcr |= FPCR_INVD; + if (!(control & IEEE_TRAP_ENABLE_DZE)) + fpcr |= FPCR_DZED; + if (!(control & IEEE_TRAP_ENABLE_OVF)) + fpcr |= FPCR_OVFD; + if (!(control & IEEE_TRAP_ENABLE_UNF)) + fpcr |= FPCR_UNFD; + if (!(control & IEEE_TRAP_ENABLE_INE)) + fpcr |= FPCR_INED; + if (control & IEEE_STATUS_MASK) + fpcr |= FPCR_SUM; + fpregs->fpr_cr = fpcr; + + /* Check the trap enable */ + if ((control >> IEEE_STATUS_TO_EXCSUM_SHIFT) + & (control & IEEE_TRAP_ENABLE_MASK)) + return 0; + } + + PUTREG(fpregs, ins.f_format.fc, result); + return 1; +} + +/* + * Attempt to complete a floating point instruction which trapped by + * emulating it in software. Return non-zero if the completion was + * successful, otherwise zero. + */ +int fp_software_completion(u_int64_t regmask, struct proc *p) +{ + struct trapframe *frame = p->p_md.md_tf; + u_int64_t pc = frame->tf_regs[FRAME_PC]; + int error; + + /* + * First we must search back through the trap shadow to find which + * instruction was responsible for generating the trap. + */ + pc -= 4; + while (regmask) { + union alpha_instruction ins; + + /* + * Read the instruction and figure out the destination + * register and opcode. + */ + error = copyin((caddr_t) pc, &ins, sizeof(ins)); + if (error) + return 0; + + switch (ins.common.opcode) { + case op_call_pal: + case op_jsr: + case op_br ... op_bgt: + /* + * Condition 6: the trap shadow may not + * include any branch instructions. Also, + * the trap shadow is bounded by EXCB, TRAPB + * and CALL_PAL. + */ + return 0; + + case op_misc: + switch (ins.memory_format.function) { + case misc_trapb: + case misc_excb: + return 0; + } + break; + + case op_inta: + case op_intl: + case op_ints: + /* + * The first 32 bits of the register mask + * represents integer registers which were + * modified in the trap shadow. + */ + regmask &= ~(1LL << ins.o_format.rc); + break; + + case op_fltv: + case op_flti: + case op_fltl: + /* + * The second 32 bits of the register mask + * represents float registers which were + * modified in the trap shadow. + */ + regmask &= ~(1LL << (ins.f_format.fc + 32)); + break; + } + + if (regmask == 0) { + /* + * We have traced back through all the + * instructions in the trap shadow, so this + * must be the one which generated the trap. + */ + if (fp_emulate(ins, p)) { + /* + * Restore pc to the first instruction + * in the trap shadow. + */ + frame->tf_regs[FRAME_PC] = pc + 4; + return 1; + } else + return 0; + } + pc -= 4; + } + return 0; +} diff --git a/sys/alpha/alpha/ieee_float.c b/sys/alpha/alpha/ieee_float.c new file mode 100644 index 000000000000..9e9846cb9153 --- /dev/null +++ b/sys/alpha/alpha/ieee_float.c @@ -0,0 +1,1515 @@ +/*- + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +/* + * An implementation of IEEE 754 floating point arithmetic supporting + * multiply, divide, addition, subtraction and conversion to and from + * integer. Probably not the fastest floating point code in the world + * but it should be pretty accurate. + * + * A special thanks to John Polstra for pointing out some problems + * with an earlier version of this code and for educating me as to the + * correct use of sticky bits. + */ + +#include +#ifdef TEST +#include "../include/fpu.h" +#include "ieee_float.h" +#else +#include +#include +#include +#include +#include +#include +#include +#endif + +/* + * The number of fraction bits in a T format float. + */ +#define T_FRACBITS 52 + +/* + * The number of fraction bits in a S format float. + */ +#define S_FRACBITS 23 + +/* + * Mask the fraction part of a float to contain only those bits which + * should be in single precision number. + */ +#define S_FRACMASK ((1ULL << 52) - (1ULL << 29)) + +/* + * The number of extra zero bits we shift into the fraction part + * to gain accuracy. Two guard bits and one sticky bit are required + * to ensure accurate rounding. + */ +#define FRAC_SHIFT 3 + +/* + * Values for 1.0 and 2.0 fractions (including the extra FRAC_SHIFT + * bits). + */ +#define ONE (1ULL << (T_FRACBITS + FRAC_SHIFT)) +#define TWO (ONE + ONE) + +/* + * The maximum and minimum values for S and T format exponents. + */ +#define T_MAXEXP 0x3ff +#define T_MINEXP -0x3fe +#define S_MAXEXP 0x7f +#define S_MINEXP -0x7e + +/* + * Exponent values in registers are biased by adding this value. + */ +#define BIAS_EXP 0x3ff + +/* + * Exponent value for INF and NaN. + */ +#define NAN_EXP 0x7ff + +/* + * If this bit is set in the fraction part of a NaN, then the number + * is a quiet NaN, i.e. no traps are generated. + */ +#define QNAN_BIT (1ULL << 51) + +/* + * Return true if the number is any kind of NaN. + */ +static __inline int +isNaN(fp_register_t f) +{ + return f.t.exponent == NAN_EXP && f.t.fraction != 0; +} + +/* + * Return true if the number is a quiet NaN. + */ +static __inline int +isQNaN(fp_register_t f) +{ + return f.t.exponent == NAN_EXP && (f.t.fraction & QNAN_BIT); +} + +/* + * Return true if the number is a signalling NaN. + */ +static __inline int +isSNaN(fp_register_t f) +{ + return isNaN(f) && !isQNaN(f); +} + +/* + * Return true if the number is +/- INF. + */ +static __inline int +isINF(fp_register_t f) +{ + return f.t.exponent == NAN_EXP && f.t.fraction == 0; +} + +/* + * Return true if the number is +/- 0. + */ +static __inline int +isZERO(fp_register_t f) +{ + return f.t.exponent == 0 && f.t.fraction == 0; +} + +/* + * Return true if the number is denormalised. + */ +static __inline int +isDENORM(fp_register_t f) +{ + return f.t.exponent == 0 && f.t.fraction != 0; +} + +/* + * Extract the exponent part of a float register. If the exponent is + * zero, the number may be denormalised (if the fraction is nonzero). + * If so, return the minimum exponent for the source datatype. + */ +static __inline int +getexp(fp_register_t f, int src) +{ + int minexp[] = { S_MINEXP, 0, T_MINEXP, 0 }; + if (f.t.exponent == 0) + if (f.t.fraction) + return minexp[src]; + else + return 0; + return f.t.exponent - BIAS_EXP; +} + +/* + * Extract the fraction part of a float register, shift it up a bit + * to give extra accuracy and add in the implicit 1 bit. Must be + * careful to handle denormalised numbers and zero correctly. + */ +static __inline u_int64_t +getfrac(fp_register_t f) +{ + if (f.t.exponent == 0) + return f.t.fraction << FRAC_SHIFT; + else + return (f.t.fraction << FRAC_SHIFT) | ONE; +} + +/* + * Make a float (in register format) from a sign, exponent and + * fraction, normalising and rounding as necessary. + * Return the float and set *status if any traps are generated. + */ +static fp_register_t +makefloat(int sign, int exp, u_int64_t frac, + int src, int rnd, + u_int64_t control, u_int64_t *status) +{ + fp_register_t f; + int minexp = 0, maxexp = 0, alpha = 0; + u_int64_t epsilon = 0, max = 0; + + if (frac == 0) { + f.t.sign = sign; + f.t.exponent = 0; + f.t.fraction = 0; + return f; + } + + if (frac >= TWO) { + /* + * Fraction is >= 2.0. + * Shift the fraction down, preserving the 'sticky' + * bit. + */ + while (frac >= TWO) { + frac = (frac >> 1) | (frac & 1); + exp++; + } + } else if (frac < ONE) { + /* + * Fraction is < 1.0. Shift it up. + */ + while (frac < ONE) { + frac = (frac << 1) | (frac & 1); + exp--; + } + } + + switch (src) { + case S_FORMAT: + minexp = S_MINEXP; + maxexp = S_MAXEXP; + alpha = 0xc0; + epsilon = (1ULL << (T_FRACBITS - S_FRACBITS + FRAC_SHIFT)); + max = TWO - epsilon; + break; + + case T_FORMAT: + minexp = T_MINEXP; + maxexp = T_MAXEXP; + alpha = 0x600; + epsilon = (1ULL << FRAC_SHIFT); + max = TWO - epsilon; + break; + } + + /* + * Handle underflow before rounding so that denormalised + * numbers are rounded correctly. + */ + if (exp < minexp) { + *status |= FPCR_INE; + if (control & IEEE_TRAP_ENABLE_UNF) { + *status |= FPCR_UNF; + exp += alpha; + } else { + /* denormalise */ + while (exp < minexp) { + exp++; + frac = (frac >> 1) | (frac & 1); + } + exp = minexp - 1; + } + } + + /* + * Round the fraction according to the rounding mode. + */ + if (frac & (epsilon - 1)) { + u_int64_t fraclo, frachi; + u_int64_t difflo, diffhi; + + fraclo = frac & max; + frachi = fraclo + epsilon; + switch (rnd) { + case ROUND_CHOP: + frac = fraclo; + break; + case ROUND_MINUS_INF: + if (f.t.sign) + frac = frachi; + else + frac = fraclo; + break; + case ROUND_NORMAL: + difflo = frac - fraclo; + diffhi = frachi - frac; + if (difflo < diffhi) + frac = fraclo; + else if (diffhi < difflo) + frac = frachi; + else + /* round to even */ + if (fraclo & epsilon) + frac = frachi; + else + frac = fraclo; + break; + case ROUND_PLUS_INF: + if (f.t.sign) + frac = fraclo; + else + frac = frachi; + break; + } + + /* + * Rounding up may take us to TWO if + * fraclo == (TWO - epsilon). Also If fraclo has been + * denormalised to (ONE - epsilon) then there is a + * possibility that we will round to ONE exactly. + */ + if (frac >= TWO) { + frac = (frac >> 1) & ~(epsilon - 1); + exp++; + } else if (exp == minexp - 1 && frac == ONE) { + /* Renormalise to ONE * 2^minexp */ + exp = minexp; + } + + *status |= FPCR_INE; + } + + /* + * Check for overflow and round to the correct INF as needed. + */ + if (exp > maxexp) { + *status |= FPCR_OVF | FPCR_INE; + if (control & IEEE_TRAP_ENABLE_OVF) { + exp -= alpha; + } else { + switch (rnd) { + case ROUND_CHOP: + exp = maxexp; + frac = max; + break; + case ROUND_MINUS_INF: + if (sign) { + exp = maxexp + 1; /* INF */ + frac = 0; + } else { + exp = maxexp; + frac = max; + } + break; + case ROUND_NORMAL: + exp = maxexp + 1; /* INF */ + frac = 0; + break; + case ROUND_PLUS_INF: + if (sign) { + exp = maxexp; + frac = max; + } else { + exp = maxexp + 1; /* INF */ + frac = 0; + } + break; + } + } + } + + f.t.sign = sign; + if (exp > maxexp) /* NaN, INF */ + f.t.exponent = NAN_EXP; + else if (exp < minexp) /* denorm, zero */ + f.t.exponent = 0; + else + f.t.exponent = exp + BIAS_EXP; + f.t.fraction = (frac & ~ONE) >> FRAC_SHIFT; + return f; +} + +/* + * Return the canonical quiet NaN in register format. + */ +static fp_register_t +makeQNaN(void) +{ + fp_register_t f; + f.t.sign = 0; + f.t.exponent = NAN_EXP; + f.t.fraction = QNAN_BIT; + return f; +} + +/* + * Return +/- INF. + */ +static fp_register_t +makeINF(int sign) +{ + fp_register_t f; + f.t.sign = sign; + f.t.exponent = NAN_EXP; + f.t.fraction = 0; + return f; +} + +/* + * Return +/- 0. + */ +static fp_register_t +makeZERO(int sign) +{ + fp_register_t f; + f.t.sign = sign; + f.t.exponent = 0; + f.t.fraction = 0; + return f; +} + +fp_register_t +ieee_add(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status) +{ + int shift; + int expa, expb, exp; + u_int64_t fraca, fracb, frac; + int sign, sticky; + + /* First handle NaNs */ + if (isNaN(fa) || isNaN(fb)) { + fp_register_t result; + + /* Instructions Descriptions (I) section 4.7.10.4 */ + if (isQNaN(fb)) + result = fb; + else if (isSNaN(fb)) { + result = fb; + result.t.fraction |= QNAN_BIT; + } else if (isQNaN(fa)) + result = fa; + else if (isSNaN(fa)) + result = fa; + + /* If either operand is a signalling NaN, trap. */ + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + + return result; + } + + /* Handle +/- INF */ + if (isINF(fa)) + if (isINF(fb)) + if (fa.t.sign != fb.t.sign) { + /* If adding -INF to +INF, generate a trap. */ + *status |= FPCR_INV; + return makeQNaN(); + } else + return fa; + else + return fa; + else if (isINF(fb)) + return fb; + + /* + * Unpack the registers. + */ + expa = getexp(fa, src); + expb = getexp(fb, src); + fraca = getfrac(fa); + fracb = getfrac(fb); + shift = expa - expb; + if (shift < 0) { + shift = -shift; + exp = expb; + sticky = (fraca & ((1ULL << shift) - 1)) != 0; + if (shift >= 64) + fraca = sticky; + else + fraca = (fraca >> shift) | sticky; + } else if (shift > 0) { + exp = expa; + sticky = (fracb & ((1ULL << shift) - 1)) != 0; + if (shift >= 64) + fracb = sticky; + else + fracb = (fracb >> shift) | sticky; + } else + exp = expa; + if (fa.t.sign) fraca = -fraca; + if (fb.t.sign) fracb = -fracb; + frac = fraca + fracb; + if (frac >> 63) { + sign = 1; + frac = -frac; + } else + sign = 0; + + /* -0 + -0 = -0 */ + if (fa.t.exponent == 0 && fa.t.fraction == 0 + && fb.t.exponent == 0 && fb.t.fraction == 0) + sign = fa.t.sign && fb.t.sign; + + return makefloat(sign, exp, frac, src, rnd, control, status); +} + +fp_register_t +ieee_sub(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status) +{ + fb.t.sign = !fb.t.sign; + return ieee_add(fa, fb, src, rnd, control, status); +} + +typedef struct { + u_int64_t lo; + u_int64_t hi; +} u_int128_t; + +#define SRL128(x, b) \ +do { \ + x.lo >>= b; \ + x.lo |= x.hi << (64 - b); \ + x.hi >>= b; \ +} while (0) + +#define SLL128(x, b) \ +do { \ + if (b >= 64) { \ + x.hi = x.lo << (b - 64); \ + x.lo = 0; \ + } else { \ + x.hi <<= b; \ + x.hi |= x.lo >> (64 - b); \ + x.lo <<= b; \ + } \ +} while (0) + +#define SUB128(a, b) \ +do { \ + int borrow = a.lo < b.lo; \ + a.lo = a.lo - b.lo; \ + a.hi = a.hi - b.hi - borrow; \ +} while (0) + +#define LESS128(a, b) (a.hi < b.hi || (a.hi == b.hi && a.lo < b.lo)) + +fp_register_t +ieee_mul(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status) +{ + int shift; + int expa, expb, exp; + u_int64_t fraca, fracb, tmp; + u_int128_t frac; + int sign; + + /* First handle NaNs */ + if (isNaN(fa) || isNaN(fb)) { + fp_register_t result; + + /* Instructions Descriptions (I) section 4.7.10.4 */ + if (isQNaN(fb)) + result = fb; + else if (isSNaN(fb)) { + result = fb; + result.t.fraction |= QNAN_BIT; + } else if (isQNaN(fa)) + result = fa; + else if (isSNaN(fa)) + result = fa; + + /* If either operand is a signalling NaN, trap. */ + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + + return result; + } + + /* Handle INF and 0 */ + if ((isINF(fa) && isZERO(fb)) || (isINF(fa) && isZERO(fb))) { + /* INF * 0 = NaN */ + *status |= FPCR_INV; + return makeQNaN(); + } else + /* If either is INF or zero, get the sign right */ + if (isINF(fa) || isINF(fb)) + return makeINF(fa.t.sign ^ fb.t.sign); + else if (isZERO(fa) || isZERO(fb)) + return makeZERO(fa.t.sign ^ fb.t.sign); + + /* + * Unpack the registers. + */ + expa = getexp(fa, src); + expb = getexp(fb, src); + fraca = getfrac(fa); + fracb = getfrac(fb); + sign = fa.t.sign ^ fb.t.sign; + +#define LO32(x) ((x) & ((1ULL << 32) - 1)) +#define HI32(x) ((x) >> 32) + + /* + * Calculate the 128bit result of multiplying fraca and fracb. + */ + frac.lo = fraca * fracb; +#ifdef __alpha__ + /* + * The alpha has a handy instruction to find the high word. + */ + __asm__ __volatile__ ("umulh %1,%2,%0" + : "=r"(tmp) + : "r"(fraca), "r"(fracb)); + frac.hi = tmp; +#else + /* + * Do the multiply longhand otherwise. + */ + frac.hi = HI32(LO32(fraca) * HI32(fracb) + + HI32(fraca) * LO32(fracb) + + HI32(LO32(fraca) * LO32(fracb))) + + HI32(fraca) * HI32(fracb); +#endif + exp = expa + expb - (T_FRACBITS + FRAC_SHIFT); + + while (frac.hi > 0) { + int sticky; + exp++; + sticky = frac.lo & 1; + SRL128(frac, 1); + frac.lo |= sticky; + } + + return makefloat(sign, exp, frac.lo, src, rnd, control, status); +} + +static u_int128_t +divide_128(u_int128_t a, u_int128_t b) +{ + u_int128_t result; + u_int64_t bit; + int i; + + /* + * Make a couple of assumptions on the numbers passed in. The + * value in 'a' will have bits set in the upper 64 bits only + * and the number in 'b' will have zeros in the upper 64 bits. + * Also, 'b' will not be zero. + */ +#ifdef TEST + if (a.hi == 0 || b.hi != 0 || b.lo == 0) + abort(); +#endif + + /* + * Find out how many bits of zeros are at the beginning of the divisor. + */ + i = 64; + bit = 1ULL << 63; + while (i < 127) { + if (b.lo & bit) + break; + i++; + bit >>= 1; + } + + /* + * Find out how much to shift the divisor so that its msb + * matches the msb of the dividend. + */ + bit = 1ULL << 63; + while (i) { + if (a.hi & bit) + break; + i--; + bit >>= 1; + } + + result.lo = 0; + result.hi = 0; + SLL128(b, i); + + /* + * Calculate the result in two parts to avoid keeping a 128bit + * value for the result bit. + */ + if (i >= 64) { + bit = 1ULL << (i - 64); + while (bit) { + if (!LESS128(a, b)) { + result.hi |= bit; + SUB128(a, b); + if (!a.lo && !a.hi) + return result; + } + bit >>= 1; + SRL128(b, 1); + } + i = 63; + } + bit = 1ULL << i; + while (bit) { + if (!LESS128(a, b)) { + result.lo |= bit; + SUB128(a, b); + if (!a.lo && !a.hi) + return result; + } + bit >>= 1; + SRL128(b, 1); + } + + return result; +} + +fp_register_t +ieee_div(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status) +{ + int shift; + int expa, expb, exp; + u_int128_t fraca, fracb, frac; + int sign; + + /* First handle NaNs, INFs and ZEROs */ + if (isNaN(fa) || isNaN(fb)) { + fp_register_t result; + + /* Instructions Descriptions (I) section 4.7.10.4 */ + if (isQNaN(fb)) + result = fb; + else if (isSNaN(fb)) { + result = fb; + result.t.fraction |= QNAN_BIT; + } else if (isQNaN(fa)) + result = fa; + else if (isSNaN(fa)) + result = fa; + + /* If either operand is a signalling NaN, trap. */ + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + + return result; + } + + /* Handle INF and 0 */ + if (isINF(fa) && isINF(fb)) { + *status |= FPCR_INV; + return makeQNaN(); + } else if (isZERO(fb)) + if (isZERO(fa)) { + *status |= FPCR_INV; + return makeQNaN(); + } else { + *status |= FPCR_DZE; + return makeINF(fa.t.sign ^ fb.t.sign); + } + else if (isZERO(fa)) + return makeZERO(fa.t.sign ^ fb.t.sign); + + /* + * Unpack the registers. + */ + expa = getexp(fa, src); + expb = getexp(fb, src); + fraca.hi = getfrac(fa); + fraca.lo = 0; + fracb.lo = getfrac(fb); + fracb.hi = 0; + sign = fa.t.sign ^ fb.t.sign; + + frac = divide_128(fraca, fracb); + + exp = expa - expb - (64 - T_FRACBITS - FRAC_SHIFT); + while (frac.hi > 0) { + int sticky; + exp++; + sticky = frac.lo & 1; + SRL128(frac, 1); + frac.lo |= sticky; + } + frac.lo |= 1; + + return makefloat(sign, exp, frac.lo, src, rnd, control, status); +} + +#define IEEE_TRUE 0x4000000000000000ULL +#define IEEE_FALSE 0 + +fp_register_t +ieee_cmpun(fp_register_t fa, fp_register_t fb, u_int64_t *status) +{ + fp_register_t result; + if (isNaN(fa) || isNaN(fb)) { + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + result.q = IEEE_TRUE; + } else + result.q = IEEE_FALSE; + + return result; +} + +fp_register_t +ieee_cmpeq(fp_register_t fa, fp_register_t fb, u_int64_t *status) +{ + fp_register_t result; + if (isNaN(fa) || isNaN(fb)) { + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + result.q = IEEE_FALSE; + } else { + if (isZERO(fa) && isZERO(fb)) + result.q = IEEE_TRUE; + else if (fa.q == fb.q) + result.q = IEEE_TRUE; + else + result.q = IEEE_FALSE; + } + + return result; +} + +fp_register_t +ieee_cmplt(fp_register_t fa, fp_register_t fb, u_int64_t *status) +{ + fp_register_t result; + if (isNaN(fa) || isNaN(fb)) { + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + result.q = IEEE_FALSE; + } else { + if (isZERO(fa) && isZERO(fb)) + result.q = IEEE_FALSE; + else if (fa.t.sign) { + /* fa is negative */ + if (!fb.t.sign) + /* fb is positive, return true */ + result.q = IEEE_TRUE; + else if (fa.t.exponent > fb.t.exponent) + /* fa has a larger exponent, return true */ + result.q = IEEE_TRUE; + else if (fa.t.exponent == fb.t.exponent + && fa.t.fraction > fb.t.fraction) + /* compare fractions */ + result.q = IEEE_TRUE; + else + result.q = IEEE_FALSE; + } else { + /* fa is positive */ + if (fb.t.sign) + /* fb is negative, return false */ + result.q = IEEE_FALSE; + else if (fb.t.exponent > fa.t.exponent) + /* fb has a larger exponent, return true */ + result.q = IEEE_TRUE; + else if (fb.t.exponent == fb.t.exponent + && fa.t.fraction < fb.t.fraction) + /* compare fractions */ + result.q = IEEE_TRUE; + else + result.q = IEEE_FALSE; + } + } + + return result; +} + +fp_register_t +ieee_cmple(fp_register_t fa, fp_register_t fb, u_int64_t *status) +{ + fp_register_t result; + if (isNaN(fa) || isNaN(fb)) { + if (isSNaN(fa) || isSNaN(fb)) + *status |= FPCR_INV; + result.q = IEEE_FALSE; + } else { + if (isZERO(fa) && isZERO(fb)) + result.q = IEEE_TRUE; + else if (fa.t.sign) { + /* fa is negative */ + if (!fb.t.sign) + /* fb is positive, return true */ + result.q = IEEE_TRUE; + else if (fa.t.exponent > fb.t.exponent) + /* fa has a larger exponent, return true */ + result.q = IEEE_TRUE; + else if (fa.t.exponent == fb.t.exponent + && fa.t.fraction >= fb.t.fraction) + /* compare fractions */ + result.q = IEEE_TRUE; + else + result.q = IEEE_FALSE; + } else { + /* fa is positive */ + if (fb.t.sign) + /* fb is negative, return false */ + result.q = IEEE_FALSE; + else if (fb.t.exponent > fa.t.exponent) + /* fb has a larger exponent, return true */ + result.q = IEEE_TRUE; + else if (fb.t.exponent == fb.t.exponent + && fa.t.fraction <= fb.t.fraction) + /* compare fractions */ + result.q = IEEE_TRUE; + else + result.q = IEEE_FALSE; + } + } + + return result; +} + +fp_register_t +ieee_convert_S_T(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status) +{ + /* + * Handle exceptional values. + */ + if (isNaN(f)) { + /* Instructions Descriptions (I) section 4.7.10.1 */ + f.t.fraction |= QNAN_BIT; + *status |= FPCR_INV; + } + if (isQNaN(f) || isINF(f)) + return f; + + /* + * If the number is a denormalised float, renormalise. + */ + if (isDENORM(f)) + return makefloat(f.t.sign, + getexp(f, S_FORMAT), + getfrac(f), + T_FORMAT, rnd, control, status); + else + return f; +} + +fp_register_t +ieee_convert_T_S(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status) +{ + /* + * Handle exceptional values. + */ + if (isNaN(f)) { + /* Instructions Descriptions (I) section 4.7.10.1 */ + f.t.fraction |= QNAN_BIT; + f.t.fraction &= ~S_FRACMASK; + *status |= FPCR_INV; + } + if (isQNaN(f) || isINF(f)) + return f; + + return makefloat(f.t.sign, + getexp(f, T_FORMAT), + getfrac(f), + S_FORMAT, rnd, control, status); +} + +fp_register_t +ieee_convert_Q_S(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status) +{ + u_int64_t frac = f.q; + int sign, exponent; + + if (frac >> 63) { + sign = 1; + frac = -frac; + } else + sign = 0; + + /* + * We shift up one bit to leave the sticky bit clear. This is + * possible unless frac == (1<<63), in which case the sticky + * bit is already clear. + */ + exponent = T_FRACBITS + FRAC_SHIFT; + if (frac < (1ULL << 63)) { + frac <<= 1; + exponent--; + } + + return makefloat(sign, exponent, frac, S_FORMAT, rnd, + control, status); +} + +fp_register_t +ieee_convert_Q_T(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status) +{ + u_int64_t frac = f.q; + int sign, exponent; + + if (frac >> 63) { + sign = 1; + frac = -frac; + } else + sign = 0; + + /* + * We shift up one bit to leave the sticky bit clear. This is + * possible unless frac == (1<<63), in which case the sticky + * bit is already clear. + */ + exponent = T_FRACBITS + FRAC_SHIFT; + if (frac < (1ULL << 63)) { + frac <<= 1; + exponent--; + } + + return makefloat(sign, exponent, frac, T_FORMAT, rnd, + control, status); +} + +fp_register_t +ieee_convert_T_Q(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status) +{ + u_int64_t frac; + int exp; + + /* + * Handle exceptional values. + */ + if (isNaN(f)) { + /* Instructions Descriptions (I) section 4.7.10.1 */ + if (isSNaN(f)) + *status |= FPCR_INV; + f.q = 0; + return f; + } + if (isINF(f)) { + /* Instructions Descriptions (I) section 4.7.10.1 */ + *status |= FPCR_INV; + f.q = 0; + return f; + } + + exp = getexp(f, T_FORMAT) - (T_FRACBITS + FRAC_SHIFT); + frac = getfrac(f); + + if (exp > 0) { + if (exp > 64 || frac >= (1 << (64 - exp))) + *status |= FPCR_IOV | FPCR_INE; + if (exp < 64) + frac <<= exp; + else + frac = 0; + } else if (exp < 0) { + u_int64_t mask; + u_int64_t fraclo, frachi; + u_int64_t diffhi, difflo; + exp = -exp; + if (exp > 64) { + fraclo = 0; + diffhi = 0; + difflo = 0; + if (frac) { + frachi = 1; + *status |= FPCR_INE; + } else + frachi = 0; + } else if (exp == 64) { + fraclo = 0; + if (frac) { + frachi = 1; + difflo = frac; + diffhi = -frac; + *status |= FPCR_INE; + } else { + frachi = 0; + difflo = 0; + diffhi = 0; + } + } else { + mask = (1 << exp) - 1; + fraclo = frac >> exp; + if (frac & mask) { + frachi = fraclo + 1; + difflo = frac - (fraclo << exp); + diffhi = (frachi << exp) - frac; + *status |= FPCR_INE; + } else { + frachi = fraclo; + difflo = 0; + diffhi = 0; + } + } + switch (rnd) { + case ROUND_CHOP: + frac = fraclo; + break; + case ROUND_MINUS_INF: + if (f.t.sign) + frac = frachi; + else + frac = fraclo; + break; + case ROUND_NORMAL: +#if 0 + /* + * Round to nearest. + */ + if (difflo < diffhi) + frac = fraclo; + else if (diffhi > difflo) + frac = frachi; + else if (fraclo & 1) + frac = frachi; + else + frac = fraclo; +#else + /* + * Round to zero. + */ + frac = fraclo; +#endif + break; + case ROUND_PLUS_INF: + if (f.t.sign) + frac = fraclo; + else + frac = frachi; + break; + } + } + + if (f.t.sign) { + if (frac > (1ULL << 63)) + *status |= FPCR_IOV | FPCR_INE; + frac = -frac; + } else { + if (frac > (1ULL << 63) - 1) + *status |= FPCR_IOV | FPCR_INE; + } + + f.q = frac; + return f; +} + +fp_register_t +ieee_convert_S_Q(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status) +{ + f = ieee_convert_S_T(f, rnd, control, status); + return ieee_convert_T_Q(f, rnd, control, status); +} + +#ifndef KERNEL + +#include +#include +#include + +union value { + double d; + fp_register_t r; +}; + + +static double +random_double() +{ + union value a; + int exp; + + a.r.t.fraction = ((long long)random() & (1ULL << 20) - 1) << 32 + | random(); + exp = random() & 0x7ff; +#if 1 + if (exp == 0) + exp = 1; /* no denorms */ + else if (exp == 0x7ff) + exp = 0x7fe; /* no NaNs and INFs */ +#endif + + a.r.t.exponent = exp; + a.r.t.sign = random() & 1; + return a.d; +} + +static float +random_float() +{ + union value a; + int exp; + + a.r.t.fraction = ((long)random() & (1ULL << 23) - 1) << 29; + exp = random() & 0xff; +#if 1 + if (exp == 0) + exp = 1; /* no denorms */ + else if (exp == 0xff) + exp = 0xfe; /* no NaNs and INFs */ +#endif + + /* map exponent from S to T format */ + if (exp == 255) + a.r.t.exponent = 0x7ff; + else if (exp & 0x80) + a.r.t.exponent = 0x400 + (exp & 0x7f); + else if (exp) + a.r.t.exponent = 0x380 + exp; + else + a.r.t.exponent = 0; + a.r.t.sign = random() & 1; + + return a.d; +} + +/* + * Ignore epsilon errors + */ +int +equal_T(union value a, union value b) +{ + if (isZERO(a.r) && isZERO(b.r)) + return 1; + if (a.r.t.sign != b.r.t.sign) + return 0; + if (a.r.t.exponent != b.r.t.exponent) + return 0; + + return a.r.t.fraction == b.r.t.fraction; +} + +int +equal_S(union value a, union value b) +{ + int64_t epsilon = 1ULL << 29; + + if (isZERO(a.r) && isZERO(b.r)) + return 1; + if (a.r.t.sign != b.r.t.sign) + return 0; + if (a.r.t.exponent != b.r.t.exponent) + return 0; + + return ((a.r.t.fraction & ~(epsilon-1)) + == (b.r.t.fraction & ~(epsilon-1))); +} + +#define ITER 1000000 + +static void +test_double_add() +{ + union value a, b, c, x; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.d = random_double(); + b.d = random_double(); + status = 0; + c.r = ieee_add(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r)) + continue; + x.d = a.d + b.d; + if (!equal_T(c, x)) { + printf("bad double add, %g + %g = %g (should be %g)\n", + a.d, b.d, c.d, x.d); + c.r = ieee_add(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + } + } +} + +static void +test_single_add() +{ + union value a, b, c, x, t; + float xf; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { +#if 0 + if (i == 0) { + a.r.q = 0xb33acf292ca49700ULL; + b.r.q = 0xcad3191058a693aeULL; + } +#endif + a.d = random_float(); + b.d = random_float(); + status = 0; + c.r = ieee_add(a.r, b.r, S_FORMAT, ROUND_NORMAL, + 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r)) + continue; + xf = a.d + b.d; + x.d = xf; + t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status); + if (!equal_S(t, x)) { + printf("bad single add, %g + %g = %g (should be %g)\n", + a.d, b.d, t.d, x.d); + c.r = ieee_add(a.r, b.r, S_FORMAT, ROUND_NORMAL, + 0, &status); + } + } +} + +static void +test_double_mul() +{ + union value a, b, c, x; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.d = random_double(); + b.d = random_double(); + status = 0; + c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r)) + continue; + x.d = a.d * b.d; + if (!equal_T(c, x)) { + printf("bad double mul, %g * %g = %g (should be %g)\n", + a.d, b.d, c.d, x.d); + c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + } + } +} + +static void +test_single_mul() +{ + union value a, b, c, x, t; + float xf; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.d = random_double(); + b.d = random_double(); + status = 0; + c.r = ieee_mul(a.r, b.r, S_FORMAT, ROUND_NORMAL, + 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r)) + continue; + xf = a.d * b.d; + x.d = xf; + t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status); + if (!equal_S(t, x)) { + printf("bad single mul, %g * %g = %g (should be %g)\n", + a.d, b.d, t.d, x.d); + c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + } + } +} + +static void +test_double_div() +{ + union value a, b, c, x; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.d = random_double(); + b.d = random_double(); + status = 0; + c.r = ieee_div(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r)) + continue; + x.d = a.d / b.d; + if (!equal_T(c, x) && !isZERO(x.r)) { + printf("bad double div, %g / %g = %g (should be %g)\n", + a.d, b.d, c.d, x.d); + c.r = ieee_div(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + } + } +} + +static void +test_single_div() +{ + union value a, b, c, x, t; + float xf; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.d = random_double(); + b.d = random_double(); + status = 0; + c.r = ieee_div(a.r, b.r, S_FORMAT, ROUND_NORMAL, + 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r) || isDENORM(c.r)) + continue; + xf = a.d / b.d; + x.d = xf; + t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status); + if (!equal_S(t, x)) { + printf("bad single div, %g / %g = %g (should be %g)\n", + a.d, b.d, t.d, x.d); + c.r = ieee_mul(a.r, b.r, T_FORMAT, ROUND_NORMAL, + 0, &status); + } + } +} + +static void +test_convert_int_to_double() +{ + union value a, c, x; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.r.q = (u_int64_t)random() << 32 + | random(); + status = 0; + c.r = ieee_convert_Q_T(a.r, ROUND_NORMAL, 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r)) + continue; + x.d = (double) a.r.q; + if (c.d != x.d) { + printf("bad convert double, (double)%qx = %g (should be %g)\n", + a.r.q, c.d, x.d); + c.r = ieee_convert_Q_T(a.r, ROUND_NORMAL, 0, &status); + } + } +} + +static void +test_convert_int_to_single() +{ + union value a, c, x, t; + float xf; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.r.q = (unsigned long long)random() << 32 + | random(); + status = 0; + c.r = ieee_convert_Q_S(a.r, ROUND_NORMAL, 0, &status); + /* ignore NaN and INF */ + if (isNaN(c.r) || isINF(c.r)) + continue; + xf = (float) a.r.q; + x.d = xf; + t.r = ieee_convert_S_T(c.r, ROUND_NORMAL, 0, &status); + if (t.d != x.d) { + printf("bad convert single, (double)%qx = %g (should be %g)\n", + a.r.q, c.d, x.d); + c.r = ieee_convert_Q_S(a.r, ROUND_NORMAL, 0, &status); + } + } +} + +static void +test_convert_double_to_int() +{ + union value a, c; + u_int64_t status = 0; + int i; + + for (i = 0; i < ITER; i++) { + a.d = random_double(); + status = 0; + c.r = ieee_convert_T_Q(a.r, ROUND_NORMAL, 0, &status); + if ((int)c.r.q != (int)a.d) { + printf("bad convert double, (int)%g = %d (should be %d)\n", + a.d, (int)c.r.q, (int)a.d); + c.r = ieee_convert_T_Q(a.r, ROUND_NORMAL, 0, &status); + } + } +} + +int +main(int argc, char* argv[]) +{ + srandom(0); + + test_double_div(); + test_single_div(); + test_double_add(); + test_single_add(); + test_double_mul(); + test_single_mul(); + test_convert_int_to_double(); + test_convert_int_to_single(); +#if 0 + /* x86 generates SIGFPE on overflows. */ + test_convert_double_to_int(); +#endif + + return 0; +} + +#endif diff --git a/sys/alpha/alpha/ieee_float.h b/sys/alpha/alpha/ieee_float.h new file mode 100644 index 000000000000..1977e910a0e1 --- /dev/null +++ b/sys/alpha/alpha/ieee_float.h @@ -0,0 +1,102 @@ +/*- + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#define S_FORMAT 0 /* IEEE single */ +#define T_FORMAT 2 /* IEEE double */ +#define Q_FORMAT 3 /* 64 bit fixed */ + +#define ROUND_CHOP 0 /* truncate fraction */ +#define ROUND_MINUS_INF 1 /* round to -INF */ +#define ROUND_NORMAL 2 /* round to nearest */ +#define ROUND_PLUS_INF 3 /* round to +INF */ + +typedef union fp_register { + struct { + u_int64_t fraction: 52; + u_int64_t exponent: 11; + u_int64_t sign: 1; + } t; + u_int64_t q; +} fp_register_t; + +fp_register_t +ieee_add(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_sub(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_mul(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_div(fp_register_t fa, fp_register_t fb, + int src, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_cmpun(fp_register_t fa, fp_register_t fb, u_int64_t *status); + +fp_register_t +ieee_cmpeq(fp_register_t fa, fp_register_t fb, u_int64_t *status); + +fp_register_t +ieee_cmplt(fp_register_t fa, fp_register_t fb, u_int64_t *status); + +fp_register_t +ieee_cmple(fp_register_t fa, fp_register_t fb, u_int64_t *status); + +fp_register_t +ieee_convert_S_T(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_convert_T_S(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_convert_Q_T(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_convert_Q_S(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_convert_T_Q(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status); + +fp_register_t +ieee_convert_S_Q(fp_register_t f, int rnd, + u_int64_t control, u_int64_t *status); + diff --git a/sys/alpha/alpha/machdep.c b/sys/alpha/alpha/machdep.c index f71ad55af073..3f1890ca28e0 100644 --- a/sys/alpha/alpha/machdep.c +++ b/sys/alpha/alpha/machdep.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: machdep.c,v 1.23 1998/11/18 23:51:40 dfr Exp $ + * $Id: machdep.c,v 1.24 1998/11/25 09:45:27 dfr Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -124,6 +124,7 @@ #include #include #include +#include #include #include #include @@ -1281,7 +1282,7 @@ sendsig(sig_t catcher, int sig, int mask, u_long code) ksc.sc_ownedfp = p->p_md.md_flags & MDP_FPUSED; bcopy(&p->p_addr->u_pcb.pcb_fp, (struct fpreg *)ksc.sc_fpregs, sizeof(struct fpreg)); - ksc.sc_fp_control = 0; /* XXX ? */ + ksc.sc_fp_control = p->p_addr->u_pcb.pcb_fp_control; bzero(ksc.sc_reserved, sizeof ksc.sc_reserved); /* XXX */ ksc.sc_xxx1[0] = 0; /* XXX */ ksc.sc_xxx1[1] = 0; /* XXX */ @@ -1393,7 +1394,7 @@ sigreturn(struct proc *p, fpcurproc = NULL; bcopy((struct fpreg *)ksc.sc_fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof(struct fpreg)); - /* XXX ksc.sc_fp_control ? */ + p->p_addr->u_pcb.pcb_fp_control = ksc.sc_fp_control; #ifdef DEBUG if (sigdebug & SDB_FOLLOW) @@ -1433,8 +1434,12 @@ setregs(struct proc *p, u_long entry, u_long stack) bzero(tfp->tf_regs, FRAME_SIZE * sizeof tfp->tf_regs[0]); bzero(&p->p_addr->u_pcb.pcb_fp, sizeof p->p_addr->u_pcb.pcb_fp); -#define FP_RN 2 /* XXX */ - p->p_addr->u_pcb.pcb_fp.fpr_cr = (long)FP_RN << 58; + p->p_addr->u_pcb.pcb_fp_control = (IEEE_TRAP_ENABLE_INV + | IEEE_TRAP_ENABLE_DZE + | IEEE_TRAP_ENABLE_OVF); + p->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL + | FPCR_INED | FPCR_UNFD); + alpha_pal_wrusp(stack); tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET; tfp->tf_regs[FRAME_PC] = entry & ~3; diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c index 2964ac0b9e5a..ffec15e76dbb 100644 --- a/sys/alpha/alpha/trap.c +++ b/sys/alpha/alpha/trap.c @@ -1,4 +1,4 @@ -/* $Id: trap.c,v 1.5 1998/07/15 20:16:27 dfr Exp $ */ +/* $Id: trap.c,v 1.6 1998/11/18 23:51:40 dfr Exp $ */ /* $NetBSD: trap.c,v 1.31 1998/03/26 02:21:46 thorpej Exp $ */ /* @@ -57,6 +57,7 @@ #include #include #include +#include #ifdef DDB #include @@ -242,11 +243,13 @@ trap(a0, a1, a2, entry, framep) case ALPHA_KENTRY_ARITH: /* - * If user-land, just give a SIGFPE. Should do - * software completion and IEEE handling, if the - * user has requested that. + * If user-land, give a SIGFPE if software completion + * is not requested or if the completion fails. */ if (user) { + if (a0 & EXCSUM_SWC) + if (fp_software_completion(a1, p)) + goto out; i = SIGFPE; ucode = a0; /* exception summary */ break; @@ -546,7 +549,7 @@ syscall(code, framep) framep->tf_regs[FRAME_TRAPARG_A1] = 0; framep->tf_regs[FRAME_TRAPARG_A2] = 0; #if notdef /* can't happen, ever. */ - if ((framep->tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE) == 0) { + if ((framep->tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE) == 0) panic("syscall"); #endif diff --git a/sys/alpha/alpha/vm_machdep.c b/sys/alpha/alpha/vm_machdep.c index 95a41d83feeb..600819c5e91c 100644 --- a/sys/alpha/alpha/vm_machdep.c +++ b/sys/alpha/alpha/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.3 1998/07/12 16:30:58 dfr Exp $ + * $Id: vm_machdep.c,v 1.4 1998/10/15 09:53:27 dfr Exp $ */ /* * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. @@ -79,6 +79,7 @@ #include #include +#include #include #include @@ -146,6 +147,17 @@ cpu_fork(p1, p2) p2->p_addr->u_pcb = p1->p_addr->u_pcb; p2->p_addr->u_pcb.pcb_hw.apcb_usp = alpha_pal_rdusp(); + /* + * Set the floating point state. + */ + if ((p2->p_addr->u_pcb.pcb_fp_control & IEEE_INHERIT) == 0) { + p2->p_addr->u_pcb.pcb_fp_control = (IEEE_TRAP_ENABLE_INV + | IEEE_TRAP_ENABLE_DZE + | IEEE_TRAP_ENABLE_OVF); + p2->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL + | FPCR_INED | FPCR_UNFD); + } + /* * Arrange for a non-local goto when the new process * is started, to resume here, returning nonzero from setjmp. diff --git a/sys/alpha/conf/files.alpha b/sys/alpha/conf/files.alpha index e7b00d6fd90c..7230d4264329 100644 --- a/sys/alpha/conf/files.alpha +++ b/sys/alpha/conf/files.alpha @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.alpha,v 1.12 1998/11/08 18:39:57 nsouch Exp $ +# $Id: files.alpha,v 1.13 1998/11/15 18:15:06 dfr Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -43,7 +43,8 @@ alpha/alpha/in_cksum.c optional inet # now normal. # alpha/alpha/locore.s standard alpha/alpha/machdep.c standard -alpha/alpha/math_emulate.c optional math_emulate +alpha/alpha/fp_emulate.c standard +alpha/alpha/ieee_float.c standard alpha/alpha/mem.c standard alpha/alpha/mp_machdep.c optional smp alpha/alpha/perfmon.c optional perfmon profiling-routine diff --git a/sys/alpha/include/fpu.h b/sys/alpha/include/fpu.h new file mode 100644 index 000000000000..d4a767e66ebd --- /dev/null +++ b/sys/alpha/include/fpu.h @@ -0,0 +1,114 @@ +/*- + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef _MACHINE_FPU_H_ +#define _MACHINE_FPU_H_ + +/* + * Floating point control register bits. + * + * From Alpha AXP Architecture Reference Manual, Instruction + * Descriptions (I) PP 4-69. + */ + +#define FPCR_INVD (1LL << 49) /* Invalid Operation DIsable */ +#define FPCR_DZED (1LL << 50) /* Division by Zero Disable */ +#define FPCR_OVFD (1LL << 51) /* Overflow Disable */ +#define FPCR_INV (1LL << 52) /* Invalid Operation */ +#define FPCR_DZE (1LL << 53) /* Division by Zero */ +#define FPCR_OVF (1LL << 54) /* Overflow */ +#define FPCR_UNF (1LL << 55) /* Underflow */ +#define FPCR_INE (1LL << 56) /* Inexact Result */ +#define FPCR_IOV (1LL << 57) /* Integer Overflow */ +#define FPCR_DYN_CHOPPED (0LL << 58) /* Chopped rounding mode */ +#define FPCR_DYN_MINUS (1LL << 58) /* Minus infinity */ +#define FPCR_DYN_NORMAL (2LL << 58) /* Normal rounding */ +#define FPCR_DYN_PLUS (3LL << 58) /* Plus infinity */ +#define FPCR_DYN_MASK (3LL << 58) /* Rounding mode mask */ +#define FPCR_DYN_SHIFT 58 +#define FPCR_UNDZ (1LL << 60) /* Underflow to Zero */ +#define FPCR_UNFD (1LL << 61) /* Underflow Disable */ +#define FPCR_INED (1LL << 62) /* Inexact Disable */ +#define FPCR_SUM (1LL << 63) /* Summary Bit */ +#define FPCR_MASK (~0LL << 49) + +/* + * Exception summary bits. + * + * From Alpha AXP Architecture Reference Manual, DEC OSF/1 Exceptions + * and Interrupts (II-B) PP 5-5. + */ + +#define EXCSUM_SWC (1LL << 0) /* Software completion */ +#define EXCSUM_INV (1LL << 1) /* Invalid operation */ +#define EXCSUM_DZE (1LL << 2) /* Division by zero */ +#define EXCSUM_OVF (1LL << 3) /* Overflow */ +#define EXCSUM_UNF (1LL << 4) /* Underflow */ +#define EXCSUM_INE (1LL << 5) /* Inexact result */ +#define EXCSUM_IOV (1LL << 6) /* Integer overflow */ + +/* + * Definitions for IEEE trap enables. These are implemented in + * software and should be compatible with OSF/1 and Linux. + */ + +/* read/write flags */ +#define IEEE_TRAP_ENABLE_INV (1LL << 1) /* Invalid operation */ +#define IEEE_TRAP_ENABLE_DZE (1LL << 2) /* Division by zero */ +#define IEEE_TRAP_ENABLE_OVF (1LL << 3) /* Overflow */ +#define IEEE_TRAP_ENABLE_UNF (1LL << 4) /* Underflow */ +#define IEEE_TRAP_ENABLE_INE (1LL << 5) /* Inexact result */ +#define IEEE_TRAP_ENABLE_MASK (IEEE_TRAP_ENABLE_INV \ + | IEEE_TRAP_ENABLE_DZE \ + | IEEE_TRAP_ENABLE_OVF \ + | IEEE_TRAP_ENABLE_UNF \ + | IEEE_TRAP_ENABLE_INE) + +/* read only flags */ +#define IEEE_STATUS_INV (1LL << 17) /* Invalid operation */ +#define IEEE_STATUS_DZE (1LL << 18) /* Division by zero */ +#define IEEE_STATUS_OVF (1LL << 19) /* Overflow */ +#define IEEE_STATUS_UNF (1LL << 20) /* Underflow */ +#define IEEE_STATUS_INE (1LL << 21) /* Inexact result */ +#define IEEE_STATUS_MASK (IEEE_STATUS_INV \ + | IEEE_STATUS_DZE \ + | IEEE_STATUS_OVF \ + | IEEE_STATUS_UNF \ + | IEEE_STATUS_INE) +#define IEEE_STATUS_TO_EXCSUM_SHIFT 16 /* convert to excsum */ +#define IEEE_STATUS_TO_FPCR_SHIFT 35 /* convert to fpcr */ + +#define IEEE_INHERIT (1LL << 63) /* inherit on fork */ + +#ifdef KERNEL + +extern int fp_software_completion(u_int64_t regmask, struct proc *p); + +#endif + +#endif /* ! _MACHINE_FPU_H_ */ diff --git a/sys/alpha/include/inst.h b/sys/alpha/include/inst.h new file mode 100644 index 000000000000..27011848b175 --- /dev/null +++ b/sys/alpha/include/inst.h @@ -0,0 +1,462 @@ +/*- + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id$ + */ + +#ifndef _MACHINE_INST_H_ +#define _MACHINE_INST_H_ + +union alpha_instruction { + u_int32_t word; + struct { + u_int32_t argument : 26; + u_int32_t opcode : 6; +#define op_call_pal 0x00 +#define op_lda 0x08 +#define op_ldah 0x09 +#define op_ldbu 0x0a +#define op_unop 0x0b +#define op_ldq_u 0x0b +#define op_ldwu 0x0c +#define op_stw 0x0d +#define op_stb 0x0e +#define op_stq_u 0x0f +#define op_inta 0x10 +#define inta_addl 0x00 +#define inta_s4addl 0x02 +#define inta_subl 0x09 +#define inta_s4subl 0x0b +#define inta_cmpbge 0x0f +#define inta_s8addl 0x12 +#define inta_s8subl 0x1b +#define inta_cmpult 0x1d +#define inta_addq 0x20 +#define inta_s4addq 0x22 +#define inta_subq 0x29 +#define inta_s4subq 0x2b +#define inta_cmpeq 0x2d +#define inta_s8addq 0x32 +#define inta_s8subq 0x3b +#define inta_cmpule 0x3d +#define inta_addlv 0x40 +#define inta_sublv 0x49 +#define inta_cmplt 0x4d +#define inta_addqv 0x60 +#define inta_subqv 0x69 +#define inta_cmple 0x6d +#define op_intl 0x11 +#define intl_and 0x00 +#define intl_andnot 0x08 +#define intl_bic 0x08 +#define intl_cmovlbs 0x14 +#define intl_cmovlbc 0x16 +#define intl_or 0x20 +#define intl_bis 0x20 +#define intl_cmoveq 0x24 +#define intl_cmovne 0x26 +#define intl_ornot 0x28 +#define intl_xor 0x40 +#define intl_cmovlt 0x44 +#define intl_cmovge 0x46 +#define intl_eqv 0x48 +#define intl_amask 0x61 +#define intl_cmovle 0x64 +#define intl_cmovgt 0x66 +#define intl_implver 0x6c +#define op_ints 0x12 +#define ints_mskbl 0x02 +#define ints_extbl 0x06 +#define ints_insbl 0x0b +#define ints_mskwl 0x12 +#define ints_extwl 0x16 +#define ints_inswl 0x1b +#define ints_mskll 0x22 +#define ints_extll 0x26 +#define ints_insll 0x2b +#define ints_zap 0x30 +#define ints_zapnot 0x31 +#define ints_mskql 0x32 +#define ints_srl 0x34 +#define ints_extql 0x36 +#define ints_sll 0x39 +#define ints_insql 0x3b +#define ints_sra 0x3c +#define ints_mskwh 0x52 +#define ints_inswh 0x57 +#define ints_extwh 0x5a +#define ints_msklh 0x62 +#define ints_inslh 0x67 +#define ints_extlh 0x6a +#define ints_mskqh 0x72 +#define ints_insqh 0x77 +#define ints_extqh 0x7a +#define op_intm 0x13 +#define intm_mull 0x00 +#define intm_mulq 0x20 +#define intm_umulh 0x30 +#define intm_mullv 0x40 +#define intm_mulqv 0x60 +#define op_opc14 0x14 +#define op_fltv 0x15 +#define op_flti 0x16 +#define flti_addsc 0x000 +#define flti_subsc 0x001 +#define flti_mulsc 0x002 +#define flti_divsc 0x003 +#define flti_addtc 0x020 +#define flti_subtc 0x021 +#define flti_multc 0x022 +#define flti_divtc 0x023 +#define flti_cvttsc 0x02c +#define flti_cvttqc 0x02f +#define flti_cvtqsc 0x03c +#define flti_cvtqtc 0x03e + +#define flti_addsm 0x040 +#define flti_subsm 0x041 +#define flti_mulsm 0x042 +#define flti_divsm 0x043 +#define flti_addtm 0x060 +#define flti_subtm 0x061 +#define flti_multm 0x062 +#define flti_divtm 0x063 +#define flti_cvttsm 0x06c +#define flti_cvttqm 0x06f +#define flti_cvtqsm 0x07c +#define flti_cvtqtm 0x07e + +#define flti_adds 0x080 +#define flti_subs 0x081 +#define flti_muls 0x082 +#define flti_divs 0x083 + +#define flti_addt 0x0a0 +#define flti_subt 0x0a1 +#define flti_mult 0x0a2 +#define flti_divt 0x0a3 +#define flti_cmptun 0x0a4 +#define flti_cmpteq 0x0a5 +#define flti_cmptlt 0x0a6 +#define flti_cmptle 0x0a7 +#define flti_cvtts 0x0ac +#define flti_cvttq 0x0af +#define flti_cvtqs 0x0bc +#define flti_cvtqt 0x0be + +#define flti_addsd 0x0c0 +#define flti_subsd 0x0c1 +#define flti_mulsd 0x0c2 +#define flti_divsd 0x0c3 +#define flti_addtd 0x0e0 +#define flti_subtd 0x0e1 +#define flti_multd 0x0e2 +#define flti_divtd 0x0e3 +#define flti_cvttsd 0x0ec +#define flti_cvttqd 0x0ef +#define flti_cvtqsd 0x0fc +#define flti_cvtqtd 0x0fe + +#define flti_addsuc 0x100 +#define flti_subsuc 0x101 +#define flti_mulsuc 0x102 +#define flti_divsuc 0x103 +#define flti_addtuc 0x120 +#define flti_subtuc 0x121 +#define flti_multuc 0x122 +#define flti_divtuc 0x123 +#define flti_cvttsuc 0x12c +#define flti_cvttqvc 0x12f + +#define flti_addsum 0x140 +#define flti_subsum 0x141 +#define flti_mulsum 0x142 +#define flti_divsum 0x143 +#define flti_addtum 0x160 +#define flti_subtum 0x161 +#define flti_multum 0x162 +#define flti_divtum 0x163 +#define flti_cvttsum 0x16c +#define flti_cvttqvm 0x16f + +#define flti_addsu 0x180 +#define flti_subsu 0x181 +#define flti_mulsu 0x182 +#define flti_divsu 0x183 +#define flti_addtu 0x1a0 +#define flti_subtu 0x1a1 +#define flti_multu 0x1a2 +#define flti_divtu 0x1a3 +#define flti_cvttsu 0x1ac +#define flti_cvttqv 0x1af + +#define flti_addsud 0x1c0 +#define flti_subsud 0x1c1 +#define flti_mulsud 0x1c2 +#define flti_divsud 0x1c3 +#define flti_addtud 0x1e0 +#define flti_subtud 0x1e1 +#define flti_multud 0x1e2 +#define flti_divtud 0x1e3 +#define flti_cvttsud 0x1ec +#define flti_cvttqvd 0x1ef + +#define flti_cvtst 0x2ac + +#define flti_addssuc 0x500 +#define flti_subssuc 0x501 +#define flti_mulssuc 0x502 +#define flti_divssuc 0x503 +#define flti_addtsuc 0x520 +#define flti_subtsuc 0x521 +#define flti_multsuc 0x522 +#define flti_divtsuc 0x523 +#define flti_cvttssuc 0x52c +#define flti_cvttqsvc 0x52f + +#define flti_addssum 0x540 +#define flti_subssum 0x541 +#define flti_mulssum 0x542 +#define flti_divssum 0x543 +#define flti_addtsum 0x560 +#define flti_subtsum 0x561 +#define flti_multsum 0x562 +#define flti_divtsum 0x563 +#define flti_cvttssum 0x56c +#define flti_cvttqsvm 0x56f + +#define flti_addssu 0x580 +#define flti_subssu 0x581 +#define flti_mulssu 0x582 +#define flti_divssu 0x583 +#define flti_addtsu 0x5a0 +#define flti_subtsu 0x5a1 +#define flti_multsu 0x5a2 +#define flti_divtsu 0x5a3 +#define flti_cmptunsu 0x5a4 +#define flti_cmpteqsu 0x5a5 +#define flti_cmptltsu 0x5a6 +#define flti_cmptlesu 0x5a7 +#define flti_cvttssu 0x5ac +#define flti_cvttqsv 0x5af + +#define flti_addssud 0x5c0 +#define flti_subssud 0x5c1 +#define flti_mulssud 0x5c2 +#define flti_divssud 0x5c3 +#define flti_addtsud 0x5e0 +#define flti_subtsud 0x5e1 +#define flti_multsud 0x5e2 +#define flti_divtsud 0x5e3 +#define flti_cvttssud 0x5ec +#define flti_cvttqsvd 0x5ef + +#define flti_cvtsts 0x6ac + +#define flti_addssuic 0x700 +#define flti_subssuic 0x701 +#define flti_mulssuic 0x702 +#define flti_divssuic 0x703 +#define flti_addtsuic 0x720 +#define flti_subtsuic 0x721 +#define flti_multsuic 0x722 +#define flti_divtsuic 0x723 +#define flti_cvttssuic 0x72c +#define flti_cvttqsvic 0x72f +#define flti_cvtqssuic 0x73c +#define flti_cvtqtsuic 0x73e + +#define flti_addssuim 0x740 +#define flti_subssuim 0x741 +#define flti_mulssuim 0x742 +#define flti_divssuim 0x743 +#define flti_addtsuim 0x760 +#define flti_subtsuim 0x761 +#define flti_multsuim 0x762 +#define flti_divtsuim 0x763 +#define flti_cvttssuim 0x76c +#define flti_cvttqsvim 0x76f +#define flti_cvtqssuim 0x77c +#define flti_cvtqtsuim 0x77e + +#define flti_addssui 0x780 +#define flti_subssui 0x781 +#define flti_mulssui 0x782 +#define flti_divssui 0x783 +#define flti_addtsui 0x7a0 +#define flti_subtsui 0x7a1 +#define flti_multsui 0x7a2 +#define flti_divtsui 0x7a3 +#define flti_cmptunsui 0x7a4 +#define flti_cmpteqsui 0x7a5 +#define flti_cmptltsui 0x7a6 +#define flti_cmptlesui 0x7a7 +#define flti_cvttssui 0x7ac +#define flti_cvttqsvi 0x7af +#define flti_cvtqssui 0x7bc +#define flti_cvtqtsui 0x7bc + +#define flti_addssuid 0x7c0 +#define flti_subssuid 0x7c1 +#define flti_mulssuid 0x7c2 +#define flti_divssuid 0x7c3 +#define flti_addtsuid 0x7e0 +#define flti_subtsuid 0x7e1 +#define flti_multsuid 0x7e2 +#define flti_divtsuid 0x7e3 +#define flti_cvttssuid 0x7ec +#define flti_cvttqsvid 0x7ef +#define flti_cvtqssuid 0x7fc +#define flti_cvtqtsuid 0x7fc + +#define op_fltl 0x17 +#define fltl_cvtlq 0x010 +#define fltl_cpys 0x020 +#define fltl_cpysn 0x021 +#define fltl_cpyse 0x022 +#define fltl_mt_fpcr 0x024 +#define fltl_mf_fpcr 0x025 +#define fltl_fcmoveq 0x02a +#define fltl_fcmovne 0x02b +#define fltl_fcmovlt 0x02c +#define fltl_fcmovge 0x02d +#define fltl_fcmovle 0x02e +#define fltl_fcmovgt 0x02f +#define fltl_cvtql 0x030 +#define fltl_cvtqlv 0x130 +#define fltl_cvtqlsv 0x530 + +#define op_misc 0x18 +#define misc_trapb 0x0000 +#define misc_excb 0x0400 +#define misc_mb 0x4000 +#define misc_wmb 0x4400 +#define misc_fetch 0x8000 +#define misc_fetch_m 0xa000 +#define misc_rpcc 0xc000 +#define misc_rc 0xe000 +#define misc_ecb 0xe800 +#define misc_rs 0xf000 +#define misc_wh64 0xf800 + +#define op_pal19 0x19 +#define op_jsr 0x1a +#define op_pal1b 0x1b +#define op_pal1c 0x1c +#define op_pal1d 0x1d +#define op_pal1e 0x1e +#define op_pal1f 0x1f +#define op_ldf 0x20 +#define op_ldg 0x21 +#define op_lds 0x22 +#define op_ldt 0x23 +#define op_stf 0x24 +#define op_stg 0x25 +#define op_sts 0x26 +#define op_stt 0x27 +#define op_ldl 0x28 +#define op_ldq 0x29 +#define op_ldl_l 0x2a +#define op_ldq_l 0x2b +#define op_stl 0x2c +#define op_stq 0x2d +#define op_stl_c 0x2e +#define op_stq_c 0x2f +#define op_br 0x30 +#define op_fbeq 0x31 +#define op_fblt 0x32 +#define op_fble 0x33 +#define op_bsr 0x34 +#define op_fbne 0x35 +#define op_fbge 0x36 +#define op_fbgt 0x37 +#define op_blbc 0x38 +#define op_beq 0x39 +#define op_blt 0x3a +#define op_ble 0x3b +#define op_blbs 0x3c +#define op_bne 0x3d +#define op_bge 0x3e +#define op_bgt 0x3f + } common; + struct { + u_int32_t function : 16; + u_int32_t rb : 5; + u_int32_t ra : 5; + u_int32_t opcode : 6; + } memory_format; + struct { + u_int32_t hint : 14; + u_int32_t function : 2; +#define jsr_jmp 0 +#define jsr_jsr 1 +#define jsr_ret 2 +#define jsr_jsr_coroutine 3 + u_int32_t rb : 5; + u_int32_t ra : 5; + u_int32_t opcode : 6; + } j_format; + struct { + int32_t memory_displacement : 16; + u_int32_t rb : 5; + u_int32_t ra : 5; + u_int32_t opcode : 6; + } m_format; + struct { + u_int32_t rc : 5; + u_int32_t function : 7; + u_int32_t form : 1; + u_int32_t sbz : 3; + u_int32_t rb : 5; + u_int32_t ra : 5; + u_int32_t opcode : 6; + } o_format; + struct { + u_int32_t rc : 5; + u_int32_t function : 7; + u_int32_t form : 1; + u_int32_t literal : 8; + u_int32_t ra : 5; + u_int32_t opcode : 6; + } l_format; + struct { + u_int32_t fc : 5; + u_int32_t function : 11; + u_int32_t fb : 5; + u_int32_t fa : 5; + u_int32_t opcode : 6; + } f_format; + struct { + u_int32_t function : 26; + u_int32_t opcode : 6; + } pal_format; + struct { + int32_t branch_displacement : 21; + u_int32_t ra : 5; + u_int32_t opcode : 6; + } b_format; +}; + +#endif /* _MACHINE_INST_H_ */ diff --git a/sys/alpha/include/pcb.h b/sys/alpha/include/pcb.h index 445617ea71dc..a6d4a18bde20 100644 --- a/sys/alpha/include/pcb.h +++ b/sys/alpha/include/pcb.h @@ -1,4 +1,4 @@ -/* $Id$ */ +/* $Id: pcb.h,v 1.1.1.1 1998/03/09 05:43:16 jb Exp $ */ /* From: NetBSD: pcb.h,v 1.6 1997/04/06 08:47:33 cgd Exp */ /* @@ -50,6 +50,7 @@ struct pcb { struct alpha_pcb pcb_hw; /* PALcode defined */ unsigned long pcb_context[9]; /* s[0-6], ra, ps [SW] */ struct fpreg pcb_fp; /* FP registers [SW] */ + u_int64_t pcb_fp_control; /* IEEE control word [SW] */ unsigned long pcb_onfault; /* for copy faults [SW] */ unsigned long pcb_accessaddr; /* for [fs]uswintr [SW] */ }; diff --git a/sys/conf/files.alpha b/sys/conf/files.alpha index e7b00d6fd90c..7230d4264329 100644 --- a/sys/conf/files.alpha +++ b/sys/conf/files.alpha @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.alpha,v 1.12 1998/11/08 18:39:57 nsouch Exp $ +# $Id: files.alpha,v 1.13 1998/11/15 18:15:06 dfr Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -43,7 +43,8 @@ alpha/alpha/in_cksum.c optional inet # now normal. # alpha/alpha/locore.s standard alpha/alpha/machdep.c standard -alpha/alpha/math_emulate.c optional math_emulate +alpha/alpha/fp_emulate.c standard +alpha/alpha/ieee_float.c standard alpha/alpha/mem.c standard alpha/alpha/mp_machdep.c optional smp alpha/alpha/perfmon.c optional perfmon profiling-routine diff --git a/sys/powerpc/aim/vm_machdep.c b/sys/powerpc/aim/vm_machdep.c index 95a41d83feeb..600819c5e91c 100644 --- a/sys/powerpc/aim/vm_machdep.c +++ b/sys/powerpc/aim/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.3 1998/07/12 16:30:58 dfr Exp $ + * $Id: vm_machdep.c,v 1.4 1998/10/15 09:53:27 dfr Exp $ */ /* * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. @@ -79,6 +79,7 @@ #include #include +#include #include #include @@ -146,6 +147,17 @@ cpu_fork(p1, p2) p2->p_addr->u_pcb = p1->p_addr->u_pcb; p2->p_addr->u_pcb.pcb_hw.apcb_usp = alpha_pal_rdusp(); + /* + * Set the floating point state. + */ + if ((p2->p_addr->u_pcb.pcb_fp_control & IEEE_INHERIT) == 0) { + p2->p_addr->u_pcb.pcb_fp_control = (IEEE_TRAP_ENABLE_INV + | IEEE_TRAP_ENABLE_DZE + | IEEE_TRAP_ENABLE_OVF); + p2->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL + | FPCR_INED | FPCR_UNFD); + } + /* * Arrange for a non-local goto when the new process * is started, to resume here, returning nonzero from setjmp. diff --git a/sys/powerpc/powerpc/vm_machdep.c b/sys/powerpc/powerpc/vm_machdep.c index 95a41d83feeb..600819c5e91c 100644 --- a/sys/powerpc/powerpc/vm_machdep.c +++ b/sys/powerpc/powerpc/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.3 1998/07/12 16:30:58 dfr Exp $ + * $Id: vm_machdep.c,v 1.4 1998/10/15 09:53:27 dfr Exp $ */ /* * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. @@ -79,6 +79,7 @@ #include #include +#include #include #include @@ -146,6 +147,17 @@ cpu_fork(p1, p2) p2->p_addr->u_pcb = p1->p_addr->u_pcb; p2->p_addr->u_pcb.pcb_hw.apcb_usp = alpha_pal_rdusp(); + /* + * Set the floating point state. + */ + if ((p2->p_addr->u_pcb.pcb_fp_control & IEEE_INHERIT) == 0) { + p2->p_addr->u_pcb.pcb_fp_control = (IEEE_TRAP_ENABLE_INV + | IEEE_TRAP_ENABLE_DZE + | IEEE_TRAP_ENABLE_OVF); + p2->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL + | FPCR_INED | FPCR_UNFD); + } + /* * Arrange for a non-local goto when the new process * is started, to resume here, returning nonzero from setjmp.