powerpcspe: Fix GPR handling in SPE exception handler

Optimize the exception handler to only save and load the upper word of the
GPRs used in the emulating instruction.  This reduces the save/load
overhead, and as a side effect does not overwrite the upper word of any
temporary register.

With this commit I am now able to run editors/abiword and math/gnumeric on a
e500-based system.

MFC after:	1 week
MFC With:	r341752,r341751
This commit is contained in:
Justin Hibbits 2018-12-13 04:48:28 +00:00
parent ff2038a9bf
commit 3067a880ce
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=342025

View File

@ -66,7 +66,6 @@ save_vec_int(struct thread *td)
*/
msr = mfmsr();
mtmsr(msr | PSL_VEC);
isync();
/*
* Save the vector registers and SPEFSCR to the PCB
@ -419,6 +418,52 @@ spe_explode(struct fpemu *fe, struct fpn *fp, uint32_t type,
return (0);
}
/*
* Save the high word of a 64-bit GPR for manipulation in the exception handler.
*/
static uint32_t
spe_save_reg_high(int reg)
{
uint32_t vec[2];
#define EVSTDW(n) case n: __asm ("evstdw %1,0(%0)" \
:: "b"(vec), "n"(n)); break;
switch (reg) {
EVSTDW(0); EVSTDW(1); EVSTDW(2); EVSTDW(3);
EVSTDW(4); EVSTDW(5); EVSTDW(6); EVSTDW(7);
EVSTDW(8); EVSTDW(9); EVSTDW(10); EVSTDW(11);
EVSTDW(12); EVSTDW(13); EVSTDW(14); EVSTDW(15);
EVSTDW(16); EVSTDW(17); EVSTDW(18); EVSTDW(19);
EVSTDW(20); EVSTDW(21); EVSTDW(22); EVSTDW(23);
EVSTDW(24); EVSTDW(25); EVSTDW(26); EVSTDW(27);
EVSTDW(28); EVSTDW(29); EVSTDW(30); EVSTDW(31);
}
#undef EVSTDW
return (vec[0]);
}
/*
* Load the given value into the high word of the requested register.
*/
static void
spe_load_reg_high(int reg, uint32_t val)
{
#define EVLDW(n) case n: __asm __volatile("evmergelo "#n",%0,0," \
:: "r"(val)); break;
switch (reg) {
EVLDW(1); EVLDW(2); EVLDW(3); EVLDW(4);
EVLDW(5); EVLDW(6); EVLDW(7); EVLDW(8);
EVLDW(9); EVLDW(10); EVLDW(11); EVLDW(12);
EVLDW(13); EVLDW(14); EVLDW(15); EVLDW(16);
EVLDW(17); EVLDW(18); EVLDW(19); EVLDW(20);
EVLDW(21); EVLDW(22); EVLDW(23); EVLDW(24);
EVLDW(25); EVLDW(26); EVLDW(27); EVLDW(28);
EVLDW(29); EVLDW(30); EVLDW(31); EVLDW(0);
}
#undef EVLDW
}
void
spe_handle_fpdata(struct trapframe *frame)
{
@ -426,11 +471,12 @@ spe_handle_fpdata(struct trapframe *frame)
struct fpn *result;
uint32_t instr, instr_sec_op;
uint32_t cr_shift, ra, rb, rd, src;
uint32_t high, low, res; /* For vector operations. */
uint32_t high, low, res, tmp; /* For vector operations. */
uint32_t spefscr = 0;
uint32_t ftod_res[2];
int width; /* Single, Double, Vector, Integer */
int err;
uint32_t msr;
err = fueword32((void *)frame->srr0, &instr);
@ -441,6 +487,7 @@ spe_handle_fpdata(struct trapframe *frame)
if ((instr >> OPC_SHIFT) != SPE_OPC)
return;
msr = mfmsr();
/*
* 'cr' field is the upper 3 bits of rd. Magically, since a) rd is 5
* bits, b) each 'cr' field is 4 bits, and c) Only the 'GT' bit is
@ -460,31 +507,34 @@ spe_handle_fpdata(struct trapframe *frame)
width = NONE;
switch (src) {
case SPE:
save_vec_nodrop(curthread);
mtmsr(msr | PSL_VEC);
switch (instr_sec_op) {
case EVFSABS:
curthread->td_pcb->pcb_vec.vr[rd][0] =
curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31);
high = spe_save_reg_high(ra) & ~(1U << 31);
frame->fixreg[rd] = frame->fixreg[ra] & ~(1U << 31);
spe_load_reg_high(rd, high);
break;
case EVFSNABS:
curthread->td_pcb->pcb_vec.vr[rd][0] =
curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31);
high = spe_save_reg_high(ra) | (1U << 31);
frame->fixreg[rd] = frame->fixreg[ra] | (1U << 31);
spe_load_reg_high(rd, high);
break;
case EVFSNEG:
curthread->td_pcb->pcb_vec.vr[rd][0] =
curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31);
high = spe_save_reg_high(ra) ^ (1U << 31);
frame->fixreg[rd] = frame->fixreg[ra] ^ (1U << 31);
spe_load_reg_high(rd, high);
break;
default:
/* High word */
spe_explode(&fpemu, &fpemu.fe_f1, SINGLE,
curthread->td_pcb->pcb_vec.vr[ra][0], 0);
spe_save_reg_high(ra), 0);
spe_explode(&fpemu, &fpemu.fe_f2, SINGLE,
curthread->td_pcb->pcb_vec.vr[rb][0], 0);
spe_save_reg_high(rb), 0);
high = spe_emu_instr(instr_sec_op, &fpemu, &result,
&curthread->td_pcb->pcb_vec.vr[rd][0]);
&tmp);
if (high < 0)
spe_load_reg_high(rd, tmp);
spefscr = fpscr_to_spefscr(fpemu.fe_cx) << 16;
/* Clear the fpemu to start over on the lower bits. */
@ -508,7 +558,6 @@ spe_handle_fpdata(struct trapframe *frame)
width = VECTOR;
break;
}
enable_vec(curthread);
goto end;
case SPFP:
@ -524,8 +573,7 @@ spe_handle_fpdata(struct trapframe *frame)
break;
case EFSCFD:
spe_explode(&fpemu, &fpemu.fe_f3, DOUBLE,
curthread->td_pcb->pcb_vec.vr[rb][0],
frame->fixreg[rb]);
spe_save_reg_high(rb), frame->fixreg[rb]);
result = &fpemu.fe_f3;
width = SINGLE;
break;
@ -538,25 +586,22 @@ spe_handle_fpdata(struct trapframe *frame)
}
break;
case DPFP:
save_vec_nodrop(curthread);
mtmsr(msr | PSL_VEC);
switch (instr_sec_op) {
case EFDABS:
curthread->td_pcb->pcb_vec.vr[rd][0] =
curthread->td_pcb->pcb_vec.vr[ra][0] & ~(1U << 31);
high = spe_save_reg_high(ra) & ~(1U << 31);
frame->fixreg[rd] = frame->fixreg[ra];
enable_vec(curthread);
spe_load_reg_high(rd, high);
break;
case EFDNABS:
curthread->td_pcb->pcb_vec.vr[rd][0] =
curthread->td_pcb->pcb_vec.vr[ra][0] | (1U << 31);
high = spe_save_reg_high(ra) | (1U << 31);
frame->fixreg[rd] = frame->fixreg[ra];
enable_vec(curthread);
spe_load_reg_high(rd, high);
break;
case EFDNEG:
curthread->td_pcb->pcb_vec.vr[rd][0] =
curthread->td_pcb->pcb_vec.vr[ra][0] ^ (1U << 31);
high = spe_save_reg_high(ra) ^ (1U << 31);
frame->fixreg[rd] = frame->fixreg[ra];
enable_vec(curthread);
spe_load_reg_high(rd, high);
break;
case EFDCFS:
spe_explode(&fpemu, &fpemu.fe_f3, SINGLE,
@ -566,11 +611,9 @@ spe_handle_fpdata(struct trapframe *frame)
break;
default:
spe_explode(&fpemu, &fpemu.fe_f1, DOUBLE,
curthread->td_pcb->pcb_vec.vr[ra][0],
frame->fixreg[ra]);
spe_save_reg_high(ra), frame->fixreg[ra]);
spe_explode(&fpemu, &fpemu.fe_f2, DOUBLE,
curthread->td_pcb->pcb_vec.vr[rb][0],
frame->fixreg[rb]);
spe_save_reg_high(rb), frame->fixreg[rb]);
width = DOUBLE;
}
break;
@ -609,10 +652,8 @@ spe_handle_fpdata(struct trapframe *frame)
frame->fixreg[rd] = fpu_ftos(&fpemu, result);
break;
case DOUBLE:
curthread->td_pcb->pcb_vec.vr[rd][0] =
fpu_ftod(&fpemu, result, ftod_res);
spe_load_reg_high(rd, fpu_ftod(&fpemu, result, ftod_res));
frame->fixreg[rd] = ftod_res[1];
enable_vec(curthread);
break;
default:
panic("Unknown storage width %d", width);
@ -624,6 +665,7 @@ spe_handle_fpdata(struct trapframe *frame)
spefscr |= (mfspr(SPR_SPEFSCR) & ~SPEFSCR_FINVS);
mtspr(SPR_SPEFSCR, spefscr);
frame->srr0 += 4;
mtmsr(msr);
return;
}