Enable the FPU on first use per-thread and save state across context

switches. Not as lazy as it could be. Changing FPU state with sigcontext
still TODO.

fpu.c - convert some asm to inline C, and macroize fpu loads/stores
swtch.S - call out to save/restore fpu routines
trap.c - always call enable_fpu, since this shouldn't be called once
         the FPU has been enabled for a thread
genassym.c - define for pcb fpu flag
This commit is contained in:
grehan 2003-03-20 10:28:20 +00:00
parent 99d692e01f
commit 68029e94a1
6 changed files with 101 additions and 117 deletions

View File

@ -88,12 +88,11 @@ ENTRY(cpu_switch)
stw %r16,PCB_USR(%r3)
stw %r1,PCB_SP(%r3) /* Save the stack pointer */
#if 0
lwz %r29,PCB_FLAGS(%r3)
andi. %r9, %r29, 1 /* XXX - don't hard code */
lwz %r5,PCB_FLAGS(%r3) /* Save FPU context if needed */
andi. %r5, %r5, PCB_FPU
beq .L1
mr %r3,%r15
bl save_fpu
#endif
.L1:
bl choosethread /* Find a new thread to run */
@ -111,16 +110,14 @@ ENTRY(cpu_switch)
mfsprg %r4,0 /* Get the pcpu pointer */
stw %r16,PC_CURTHREAD(%r4) /* Store new current thread */
mr %r3,%r16 /* and save in r3 */
lwz %r16,TD_PCB(%r16) /* Store new current PCB */
stw %r16,PC_CURPCB(%r4)
lwz %r17,TD_PCB(%r16) /* Store new current PCB */
stw %r17,PC_CURPCB(%r4)
#if 0
lwz %r29, PCB_FLAGS(%r4) /* Restore FPU regs if needed */
andi. %r9, %r29, 1
lwz %r5, PCB_FLAGS(%r17) /* Restore FPU context if needed */
andi. %r5, %r5, PCB_FPU
beq .L2
mr %r3, %r4
bl enable_fpu
#endif
mr %r3,%r16 /* Recover new curthread */
/* thread to restore is in r3 */
.L2:

View File

@ -150,7 +150,7 @@ trapname(u_int vector)
void
trap(struct trapframe *frame)
{
struct thread *td, *fputhread;
struct thread *td;
struct proc *p;
int sig, type, user;
u_int sticks, ucode;
@ -192,13 +192,9 @@ trap(struct trapframe *frame)
break;
case EXC_FPU:
if ((fputhread = PCPU_GET(fputhread)) != NULL) {
save_fpu(fputhread);
}
PCPU_SET(fputhread, td);
td->td_pcb->pcb_fpcpu = PCPU_GET(cpuid);
KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
("FPU already enabled for thread"));
enable_fpu(td);
frame->srr1 |= PSL_FP;
break;
#ifdef ALTIVEC
@ -253,10 +249,6 @@ trap(struct trapframe *frame)
}
}
if (td != PCPU_GET(fputhread) ||
td->td_pcb->pcb_fpcpu != PCPU_GET(cpuid))
frame->srr1 &= ~PSL_FP;
#ifdef ALTIVEC
if (td != PCPU_GET(vecthread) ||
td->td_pcb->pcb_veccpu != PCPU_GET(cpuid))

View File

@ -47,101 +47,106 @@ static const char rcsid[] =
void
enable_fpu(struct thread *td)
{
int msr, scratch;
int msr;
struct pcb *pcb;
struct trapframe *tf;
pcb = td->td_pcb;
tf = trapframe(td);
/*
* Save the thread's FPU CPU number, and set the CPU's current
* FPU thread
*/
td->td_pcb->pcb_fpcpu = PCPU_GET(cpuid);
PCPU_SET(fputhread, td);
/*
* Enable the FPU for when the thread returns from the exception.
* If this is the first time the FPU has been used by the thread,
* initialise the FPU registers and FPSCR to 0, and set the flag
* to indicate that the FPU is in use.
*/
tf->srr1 |= PSL_FP;
if (!(pcb->pcb_flags & PCB_FPU)) {
memset(&pcb->pcb_fpu, 0, sizeof pcb->pcb_fpu);
pcb->pcb_flags |= PCB_FPU;
}
__asm __volatile ("mfmsr %0; ori %1,%0,%2; mtmsr %1; isync"
: "=r"(msr), "=r"(scratch) : "K"(PSL_FP));
/*
* Temporarily enable floating-point so the registers
* can be restored.
*/
msr = mfmsr();
mtmsr(msr | PSL_FP);
isync();
/*
* Load the floating point registers and FPSCR from the PCB.
* (A value of 0xff for mtfsf specifies that all 8 4-bit fields
* of the saved FPSCR are to be loaded from the FPU reg).
*/
__asm __volatile ("lfd 0,0(%0); mtfsf 0xff,0"
:: "b"(&pcb->pcb_fpu.fpscr));
__asm ("lfd 0,0(%0);"
"lfd 1,8(%0);"
"lfd 2,16(%0);"
"lfd 3,24(%0);"
"lfd 4,32(%0);"
"lfd 5,40(%0);"
"lfd 6,48(%0);"
"lfd 7,56(%0);"
"lfd 8,64(%0);"
"lfd 9,72(%0);"
"lfd 10,80(%0);"
"lfd 11,88(%0);"
"lfd 12,96(%0);"
"lfd 13,104(%0);"
"lfd 14,112(%0);"
"lfd 15,120(%0);"
"lfd 16,128(%0);"
"lfd 17,136(%0);"
"lfd 18,144(%0);"
"lfd 19,152(%0);"
"lfd 20,160(%0);"
"lfd 21,168(%0);"
"lfd 22,176(%0);"
"lfd 23,184(%0);"
"lfd 24,192(%0);"
"lfd 25,200(%0);"
"lfd 26,208(%0);"
"lfd 27,216(%0);"
"lfd 28,224(%0);"
"lfd 29,232(%0);"
"lfd 30,240(%0);"
"lfd 31,248(%0)" :: "b"(&pcb->pcb_fpu.fpr[0]));
__asm __volatile ("mtmsr %0; isync" :: "r"(msr));
#define LFP(n) __asm ("lfd " #n ", 0(%0)" \
:: "b"(&pcb->pcb_fpu.fpr[n]));
LFP(0); LFP(1); LFP(2); LFP(3);
LFP(4); LFP(5); LFP(6); LFP(7);
LFP(8); LFP(9); LFP(10); LFP(11);
LFP(12); LFP(13); LFP(14); LFP(15);
LFP(16); LFP(17); LFP(18); LFP(19);
LFP(20); LFP(21); LFP(22); LFP(23);
LFP(24); LFP(25); LFP(26); LFP(27);
LFP(28); LFP(29); LFP(30); LFP(31);
#undef LFP
isync();
mtmsr(msr);
}
void
save_fpu(struct thread *td)
{
int msr, scratch;
int msr;
struct pcb *pcb;
pcb = td->td_pcb;
__asm __volatile ("mfmsr %0; ori %1,%0,%2; mtmsr %1; isync"
: "=r"(msr), "=r"(scratch) : "K"(PSL_FP));
__asm ("stfd 0,0(%0);"
"stfd 1,8(%0);"
"stfd 2,16(%0);"
"stfd 3,24(%0);"
"stfd 4,32(%0);"
"stfd 5,40(%0);"
"stfd 6,48(%0);"
"stfd 7,56(%0);"
"stfd 8,64(%0);"
"stfd 9,72(%0);"
"stfd 10,80(%0);"
"stfd 11,88(%0);"
"stfd 12,96(%0);"
"stfd 13,104(%0);"
"stfd 14,112(%0);"
"stfd 15,120(%0);"
"stfd 16,128(%0);"
"stfd 17,136(%0);"
"stfd 18,144(%0);"
"stfd 19,152(%0);"
"stfd 20,160(%0);"
"stfd 21,168(%0);"
"stfd 22,176(%0);"
"stfd 23,184(%0);"
"stfd 24,192(%0);"
"stfd 25,200(%0);"
"stfd 26,208(%0);"
"stfd 27,216(%0);"
"stfd 28,224(%0);"
"stfd 29,232(%0);"
"stfd 30,240(%0);"
"stfd 31,248(%0)" :: "b"(&pcb->pcb_fpu.fpr[0]));
/*
* Temporarily re-enable floating-point during the save
*/
msr = mfmsr();
mtmsr(msr | PSL_FP);
isync();
/*
* Save the floating-point registers and FPSCR to the PCB
*/
#define SFP(n) __asm ("stfd " #n ", 0(%0)" \
:: "b"(&pcb->pcb_fpu.fpr[n]));
SFP(0); SFP(1); SFP(2); SFP(3);
SFP(4); SFP(5); SFP(6); SFP(7);
SFP(8); SFP(9); SFP(10); SFP(11);
SFP(12); SFP(13); SFP(14); SFP(15);
SFP(16); SFP(17); SFP(18); SFP(19);
SFP(20); SFP(21); SFP(22); SFP(23);
SFP(24); SFP(25); SFP(26); SFP(27);
SFP(28); SFP(29); SFP(30); SFP(31);
#undef SFP
__asm __volatile ("mffs 0; stfd 0,0(%0)" :: "b"(&pcb->pcb_fpu.fpscr));
__asm __volatile ("mtmsr %0; isync" :: "r"(msr));
/*
* Disable floating-point again
*/
isync();
mtmsr(msr);
/*
* Clear the current fp thread and pcb's CPU id
* XXX should this be left clear to allow lazy save/restore ?
*/
pcb->pcb_fpcpu = NULL;
PCPU_SET(fputhread, NULL);
}

View File

@ -137,6 +137,7 @@ ASSYM(PCB_LR, offsetof(struct pcb, pcb_lr));
ASSYM(PCB_USR, offsetof(struct pcb, pcb_usr));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_FPU, PCB_FPU);
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));

View File

@ -88,12 +88,11 @@ ENTRY(cpu_switch)
stw %r16,PCB_USR(%r3)
stw %r1,PCB_SP(%r3) /* Save the stack pointer */
#if 0
lwz %r29,PCB_FLAGS(%r3)
andi. %r9, %r29, 1 /* XXX - don't hard code */
lwz %r5,PCB_FLAGS(%r3) /* Save FPU context if needed */
andi. %r5, %r5, PCB_FPU
beq .L1
mr %r3,%r15
bl save_fpu
#endif
.L1:
bl choosethread /* Find a new thread to run */
@ -111,16 +110,14 @@ ENTRY(cpu_switch)
mfsprg %r4,0 /* Get the pcpu pointer */
stw %r16,PC_CURTHREAD(%r4) /* Store new current thread */
mr %r3,%r16 /* and save in r3 */
lwz %r16,TD_PCB(%r16) /* Store new current PCB */
stw %r16,PC_CURPCB(%r4)
lwz %r17,TD_PCB(%r16) /* Store new current PCB */
stw %r17,PC_CURPCB(%r4)
#if 0
lwz %r29, PCB_FLAGS(%r4) /* Restore FPU regs if needed */
andi. %r9, %r29, 1
lwz %r5, PCB_FLAGS(%r17) /* Restore FPU context if needed */
andi. %r5, %r5, PCB_FPU
beq .L2
mr %r3, %r4
bl enable_fpu
#endif
mr %r3,%r16 /* Recover new curthread */
/* thread to restore is in r3 */
.L2:

View File

@ -150,7 +150,7 @@ trapname(u_int vector)
void
trap(struct trapframe *frame)
{
struct thread *td, *fputhread;
struct thread *td;
struct proc *p;
int sig, type, user;
u_int sticks, ucode;
@ -192,13 +192,9 @@ trap(struct trapframe *frame)
break;
case EXC_FPU:
if ((fputhread = PCPU_GET(fputhread)) != NULL) {
save_fpu(fputhread);
}
PCPU_SET(fputhread, td);
td->td_pcb->pcb_fpcpu = PCPU_GET(cpuid);
KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
("FPU already enabled for thread"));
enable_fpu(td);
frame->srr1 |= PSL_FP;
break;
#ifdef ALTIVEC
@ -253,10 +249,6 @@ trap(struct trapframe *frame)
}
}
if (td != PCPU_GET(fputhread) ||
td->td_pcb->pcb_fpcpu != PCPU_GET(cpuid))
frame->srr1 &= ~PSL_FP;
#ifdef ALTIVEC
if (td != PCPU_GET(vecthread) ||
td->td_pcb->pcb_veccpu != PCPU_GET(cpuid))