Add a support macro to convert the 5-bit packed register field of

a floating point instruction into a 6-bit register number for
double and quad arguments.
Make use of the new INSFPdq_RN macro where apporpriate; this
is required for correctly handling the "high" fp registers
(>= %f32).
Fix a number of bugs related to the handling of the high registers
which were caused by using __fpu_[gs]etreg() where __fpu_[gs]etreg64()
should be used (the former can only access the low, single-precision,
registers).

Submitted by:	tmm
This commit is contained in:
Jake Burkholder 2002-05-11 21:20:05 +00:00
parent ab4448f3e4
commit 025c284b8d
4 changed files with 93 additions and 66 deletions

View File

@ -186,24 +186,62 @@ __fpu_dumpfpn(struct fpn *fp)
} }
#endif #endif
static int opmask[] = {0, 0, 1, 3};
/* Decode 5 bit register field depending on the type. */
#define RN_DECODE(tp, rn) \
((tp == FTYPE_DBL || tp == FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) & \
~opmask[tp])
/* Operand size in 32-bit registers. */
#define OPSZ(tp) ((tp) == FTYPE_LNG ? 2 : (1 << (tp)))
/*
* Helper for forming the below case statements. Build only the op3 and opf
* field of the instruction, these are the only ones that need to match.
*/
#define FOP(op3, opf) \
((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
/*
* Implement a move operation for all supported operand types. The additional
* nand and xor parameters will be applied to the upper 32 bit word of the
* source operand. This allows to implement fabs and fneg (for fp operands
* only!) using this functions, too, by passing (1 << 31) for one of the
* parameters, and 0 for the other.
*/
static void static void
__fpu_mov(struct fpemu *fe, int type, int rd, int rs1, int rs2) __fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand,
u_int32_t xor)
{ {
u_int64_t tmp64;
u_int32_t *p32;
int i; int i;
i = 1 << type; if (type == FTYPE_INT || type == FTYPE_SNG)
__fpu_setreg(rd++, rs1); __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor);
while (--i) else {
__fpu_setreg(rd++, __fpu_getreg(++rs2)); /*
* Need to use the double versions to be able to access
* the upper 32 fp registers.
*/
for (i = 0; i < OPSZ(type); i += 2, rd += 2, rs2 += 2) {
tmp64 = __fpu_getreg64(rs2);
if (i == 0)
tmp64 = (tmp64 & ~((u_int64_t)nand << 32)) ^
((u_int64_t)xor << 32);
__fpu_setreg64(rd, tmp64);
}
}
} }
static __inline void static __inline void
__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs1, int rs2, __fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2,
u_int32_t insn, int fcc) u_int32_t insn, int fcc)
{ {
if (IF_F4_COND(insn) == fcc) if (IF_F4_COND(insn) == fcc)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
} }
static int static int
@ -230,15 +268,6 @@ __fpu_cmpck(struct fpemu *fe)
return (0); return (0);
} }
static int opmask[] = {0, 0, 1, 3};
/*
* Helper for forming the below case statements. Build only the op3 and opf
* field of the instruction, these are the only that need to match.
*/
#define FOP(op3, opf) \
((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
/* /*
* Execute an FPU instruction (one that runs entirely in the FPU; not * Execute an FPU instruction (one that runs entirely in the FPU; not
* FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be * FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be
@ -254,6 +283,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
int opf, rs1, rs2, rd, type, mask, cx, cond; int opf, rs1, rs2, rd, type, mask, cx, cond;
u_long reg, fsr; u_long reg, fsr;
u_int space[4]; u_int space[4];
int i;
/* /*
* `Decode' and execute instruction. Start with no exceptions. * `Decode' and execute instruction. Start with no exceptions.
@ -263,13 +293,12 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) | opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2)); IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
type = IF_F3_OPF(insn) & 3; type = IF_F3_OPF(insn) & 3;
mask = opmask[type]; rs1 = RN_DECODE(type, IF_F3_RS1(insn));
rs1 = IF_F3_RS1(insn) & ~mask; rs2 = RN_DECODE(type, IF_F3_RS2(insn));
rs2 = IF_F3_RS2(insn) & ~mask; rd = RN_DECODE(type, IF_F3_RD(insn));
rd = IF_F3_RD(insn) & ~mask;
cond = 0; cond = 0;
#ifdef notdef #ifdef notdef
if ((rs1 | rs2 | rd) & mask) if ((rs1 | rs2 | rd) & opmask[type])
return (SIGILL); return (SIGILL);
#endif #endif
fsr = fe->fe_fsr; fsr = fe->fe_fsr;
@ -277,58 +306,54 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
fe->fe_cx = 0; fe->fe_cx = 0;
switch (opf) { switch (opf) {
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))): case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr));
FSR_GET_FCC0(fsr));
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))): case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr));
FSR_GET_FCC1(fsr));
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))): case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr));
FSR_GET_FCC2(fsr));
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))): case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr));
FSR_GET_FCC3(fsr));
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)): case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, __fpu_ccmov(fe, type, rd, rs2, insn,
(tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT); (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)): case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
__fpu_ccmov(fe, type, rd, __fpu_getreg(rs2), rs2, insn, __fpu_ccmov(fe, type, rd, rs2, insn,
(tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT)); (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)): case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
if (reg == 0) if (reg == 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)): case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
if (reg <= 0) if (reg <= 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)): case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
if (reg < 0) if (reg < 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)): case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
if (reg != 0) if (reg != 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)): case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
if (reg > 0) if (reg > 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)): case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
if (reg >= 0) if (reg >= 0)
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop2, INSFP2_FCMP): case FOP(INS2_FPop2, INSFP2_FCMP):
__fpu_explode(fe, &fe->fe_f1, type, rs1); __fpu_explode(fe, &fe->fe_f1, type, rs1);
@ -341,13 +366,13 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
__fpu_compare(fe, 1, IF_F3_CC(insn)); __fpu_compare(fe, 1, IF_F3_CC(insn));
return (__fpu_cmpck(fe)); return (__fpu_cmpck(fe));
case FOP(INS2_FPop1, INSFP1_FMOV): /* these should all be pretty obvious */ case FOP(INS2_FPop1, INSFP1_FMOV): /* these should all be pretty obvious */
__fpu_mov(fe, type, rd, __fpu_getreg(rs2), rs2); __fpu_mov(fe, type, rd, rs2, 0, 0);
return (0); return (0);
case FOP(INS2_FPop1, INSFP1_FNEG): case FOP(INS2_FPop1, INSFP1_FNEG):
__fpu_mov(fe, type, rd, __fpu_getreg(rs2) ^ (1 << 31), rs2); __fpu_mov(fe, type, rd, rs2, 0, (1 << 31));
return (0); return (0);
case FOP(INS2_FPop1, INSFP1_FABS): case FOP(INS2_FPop1, INSFP1_FABS):
__fpu_mov(fe, type, rd, __fpu_getreg(rs2) & ~(1 << 31), rs2); __fpu_mov(fe, type, rd, rs2, (1 << 31), 0);
return (0); return (0);
case FOP(INS2_FPop1, INSFP1_FSQRT): case FOP(INS2_FPop1, INSFP1_FSQRT):
__fpu_explode(fe, &fe->fe_f1, type, rs2); __fpu_explode(fe, &fe->fe_f1, type, rs2);
@ -384,8 +409,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
* Recalculate rd (the old type applied for the source regs * Recalculate rd (the old type applied for the source regs
* only, the target one has a different size). * only, the target one has a different size).
*/ */
mask = opmask[type]; rd = RN_DECODE(type, IF_F3_RD(insn));
rd = IF_F3_RD(insn) & ~mask;
fp = __fpu_mul(fe); fp = __fpu_mul(fe);
break; break;
case FOP(INS2_FPop1, INSFP1_FxTOs): case FOP(INS2_FPop1, INSFP1_FxTOs):
@ -395,8 +419,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
__fpu_explode(fe, fp = &fe->fe_f1, type, rs2); __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
/* sneaky; depends on instruction encoding */ /* sneaky; depends on instruction encoding */
type = (IF_F3_OPF(insn) >> 2) & 3; type = (IF_F3_OPF(insn) >> 2) & 3;
mask = opmask[type]; rd = RN_DECODE(type, IF_F3_RD(insn));
rd = IF_F3_RD(insn) & ~mask;
break; break;
case FOP(INS2_FPop1, INSFP1_FTOx): case FOP(INS2_FPop1, INSFP1_FTOx):
__fpu_explode(fe, fp = &fe->fe_f1, type, rs2); __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
@ -411,8 +434,7 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
__fpu_explode(fe, fp = &fe->fe_f1, type, rs2); __fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
/* sneaky; depends on instruction encoding */ /* sneaky; depends on instruction encoding */
type = (IF_F3_OPF(insn) >> 2) & 3; type = (IF_F3_OPF(insn) >> 2) & 3;
mask = opmask[type]; rd = RN_DECODE(type, IF_F3_RD(insn));
rd = IF_F3_RD(insn) & ~mask;
break; break;
default: default:
return (SIGILL); return (SIGILL);
@ -438,12 +460,12 @@ __fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long ts
fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT); fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
} }
fe->fe_fsr = fsr; fe->fe_fsr = fsr;
__fpu_setreg(rd, space[0]); if (type == FTYPE_INT || type == FTYPE_SNG)
if (type >= FTYPE_DBL || type == FTYPE_LNG) { __fpu_setreg(rd, space[0]);
__fpu_setreg(rd + 1, space[1]); else {
if (type > FTYPE_DBL) { for (i = 0; i < OPSZ(type); i += 2) {
__fpu_setreg(rd + 2, space[2]); __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) |
__fpu_setreg(rd + 3, space[3]); space[i + 1]);
} }
} }
return (0); /* success */ return (0); /* success */

View File

@ -248,16 +248,21 @@ __fpu_explode(fe, fp, type, reg)
struct fpn *fp; struct fpn *fp;
int type, reg; int type, reg;
{ {
u_int s; u_int32_t s, *sp;
u_int64_t l; u_int64_t l[2];
l = __fpu_getreg64(reg & ~1); if (type == FTYPE_LNG || type == FTYPE_DBL || type == FTYPE_EXT) {
s = __fpu_getreg(reg); l[0] = __fpu_getreg64(reg & ~1);
fp->fp_sign = s >> 31; sp = (u_int32_t *)l;
fp->fp_sign = sp[0] >> 31;
} else {
s = __fpu_getreg(reg);
fp->fp_sign = s >> 31;
}
fp->fp_sticky = 0; fp->fp_sticky = 0;
switch (type) { switch (type) {
case FTYPE_LNG: case FTYPE_LNG:
s = __fpu_xtof(fp, l); s = __fpu_xtof(fp, l[0]);
break; break;
case FTYPE_INT: case FTYPE_INT:
@ -269,13 +274,12 @@ __fpu_explode(fe, fp, type, reg)
break; break;
case FTYPE_DBL: case FTYPE_DBL:
s = __fpu_dtof(fp, s, __fpu_getreg(reg + 1)); s = __fpu_dtof(fp, sp[0], sp[1]);
break; break;
case FTYPE_EXT: case FTYPE_EXT:
s = __fpu_qtof(fp, s, __fpu_getreg(reg + 1), l[1] = __fpu_getreg64((reg & ~1) + 2);
__fpu_getreg(reg + 2), s = __fpu_qtof(fp, sp[0], sp[1], sp[2], sp[3]);
__fpu_getreg(reg + 3));
break; break;
default: default:

View File

@ -70,15 +70,13 @@ __emul_insn(struct utrapframe *uf)
case IOP_LDST: case IOP_LDST:
switch (IF_F3_OP3(insn)) { switch (IF_F3_OP3(insn)) {
case INS3_LDQF: case INS3_LDQF:
rd = IF_F3_RD(insn); rd = INSFPdq_RN(IF_F3_RD(insn));
rd = (rd & ~3) | ((rd & 1) << 5);
addr = (u_long *)__emul_f3_memop_addr(uf, insn); addr = (u_long *)__emul_f3_memop_addr(uf, insn);
__fpu_setreg64(rd, addr[0]); __fpu_setreg64(rd, addr[0]);
__fpu_setreg64(rd + 2, addr[1]); __fpu_setreg64(rd + 2, addr[1]);
break; break;
case INS3_STQF: case INS3_STQF:
rd = IF_F3_RD(insn); rd = INSFPdq_RN(IF_F3_RD(insn));
rd = (rd & ~3) | ((rd & 1) << 5);
addr = (u_long *)__emul_f3_memop_addr(uf, insn); addr = (u_long *)__emul_f3_memop_addr(uf, insn);
addr[0] = __fpu_getreg64(rd); addr[0] = __fpu_getreg64(rd);
addr[1] = __fpu_getreg64(rd + 2); addr[1] = __fpu_getreg64(rd + 2);

View File

@ -459,6 +459,9 @@
#define INSFP2_FCMP 0x050 /* s, d, q */ #define INSFP2_FCMP 0x050 /* s, d, q */
#define INSFP2_FCMPE 0x054 /* s, d, q */ #define INSFP2_FCMPE 0x054 /* s, d, q */
/* Decode 5-bit register field into 6-bit number (for doubles and quads). */
#define INSFPdq_RN(rn) (((rn) & ~1) | (((rn) & 1) << 5))
/* IMPLDEP1 for Sun UltraSparc */ /* IMPLDEP1 for Sun UltraSparc */
#define IIDP1_EDGE8 0x00 #define IIDP1_EDGE8 0x00
#define IIDP1_EDGE8L 0x02 #define IIDP1_EDGE8L 0x02