Enable the FPU on first use per-thread and save state across context
switches. Not as lazy as it could be. Changing FPU state with sigcontext still TODO. fpu.c - convert some asm to inline C, and macroize fpu loads/stores swtch.S - call out to save/restore fpu routines trap.c - always call enable_fpu, since this shouldn't be called once the FPU has been enabled for a thread genassym.c - define for pcb fpu flag
This commit is contained in:
parent
99d692e01f
commit
68029e94a1
@ -88,12 +88,11 @@ ENTRY(cpu_switch)
|
||||
stw %r16,PCB_USR(%r3)
|
||||
stw %r1,PCB_SP(%r3) /* Save the stack pointer */
|
||||
|
||||
#if 0
|
||||
lwz %r29,PCB_FLAGS(%r3)
|
||||
andi. %r9, %r29, 1 /* XXX - don't hard code */
|
||||
lwz %r5,PCB_FLAGS(%r3) /* Save FPU context if needed */
|
||||
andi. %r5, %r5, PCB_FPU
|
||||
beq .L1
|
||||
mr %r3,%r15
|
||||
bl save_fpu
|
||||
#endif
|
||||
|
||||
.L1:
|
||||
bl choosethread /* Find a new thread to run */
|
||||
@ -111,16 +110,14 @@ ENTRY(cpu_switch)
|
||||
mfsprg %r4,0 /* Get the pcpu pointer */
|
||||
stw %r16,PC_CURTHREAD(%r4) /* Store new current thread */
|
||||
mr %r3,%r16 /* and save in r3 */
|
||||
lwz %r16,TD_PCB(%r16) /* Store new current PCB */
|
||||
stw %r16,PC_CURPCB(%r4)
|
||||
lwz %r17,TD_PCB(%r16) /* Store new current PCB */
|
||||
stw %r17,PC_CURPCB(%r4)
|
||||
|
||||
#if 0
|
||||
lwz %r29, PCB_FLAGS(%r4) /* Restore FPU regs if needed */
|
||||
andi. %r9, %r29, 1
|
||||
lwz %r5, PCB_FLAGS(%r17) /* Restore FPU context if needed */
|
||||
andi. %r5, %r5, PCB_FPU
|
||||
beq .L2
|
||||
mr %r3, %r4
|
||||
bl enable_fpu
|
||||
#endif
|
||||
mr %r3,%r16 /* Recover new curthread */
|
||||
|
||||
/* thread to restore is in r3 */
|
||||
.L2:
|
||||
|
@ -150,7 +150,7 @@ trapname(u_int vector)
|
||||
void
|
||||
trap(struct trapframe *frame)
|
||||
{
|
||||
struct thread *td, *fputhread;
|
||||
struct thread *td;
|
||||
struct proc *p;
|
||||
int sig, type, user;
|
||||
u_int sticks, ucode;
|
||||
@ -192,13 +192,9 @@ trap(struct trapframe *frame)
|
||||
break;
|
||||
|
||||
case EXC_FPU:
|
||||
if ((fputhread = PCPU_GET(fputhread)) != NULL) {
|
||||
save_fpu(fputhread);
|
||||
}
|
||||
PCPU_SET(fputhread, td);
|
||||
td->td_pcb->pcb_fpcpu = PCPU_GET(cpuid);
|
||||
KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
|
||||
("FPU already enabled for thread"));
|
||||
enable_fpu(td);
|
||||
frame->srr1 |= PSL_FP;
|
||||
break;
|
||||
|
||||
#ifdef ALTIVEC
|
||||
@ -253,10 +249,6 @@ trap(struct trapframe *frame)
|
||||
}
|
||||
}
|
||||
|
||||
if (td != PCPU_GET(fputhread) ||
|
||||
td->td_pcb->pcb_fpcpu != PCPU_GET(cpuid))
|
||||
frame->srr1 &= ~PSL_FP;
|
||||
|
||||
#ifdef ALTIVEC
|
||||
if (td != PCPU_GET(vecthread) ||
|
||||
td->td_pcb->pcb_veccpu != PCPU_GET(cpuid))
|
||||
|
@ -47,101 +47,106 @@ static const char rcsid[] =
|
||||
void
|
||||
enable_fpu(struct thread *td)
|
||||
{
|
||||
int msr, scratch;
|
||||
int msr;
|
||||
struct pcb *pcb;
|
||||
struct trapframe *tf;
|
||||
|
||||
pcb = td->td_pcb;
|
||||
tf = trapframe(td);
|
||||
|
||||
|
||||
/*
|
||||
* Save the thread's FPU CPU number, and set the CPU's current
|
||||
* FPU thread
|
||||
*/
|
||||
td->td_pcb->pcb_fpcpu = PCPU_GET(cpuid);
|
||||
PCPU_SET(fputhread, td);
|
||||
|
||||
/*
|
||||
* Enable the FPU for when the thread returns from the exception.
|
||||
* If this is the first time the FPU has been used by the thread,
|
||||
* initialise the FPU registers and FPSCR to 0, and set the flag
|
||||
* to indicate that the FPU is in use.
|
||||
*/
|
||||
tf->srr1 |= PSL_FP;
|
||||
if (!(pcb->pcb_flags & PCB_FPU)) {
|
||||
memset(&pcb->pcb_fpu, 0, sizeof pcb->pcb_fpu);
|
||||
pcb->pcb_flags |= PCB_FPU;
|
||||
}
|
||||
__asm __volatile ("mfmsr %0; ori %1,%0,%2; mtmsr %1; isync"
|
||||
: "=r"(msr), "=r"(scratch) : "K"(PSL_FP));
|
||||
|
||||
/*
|
||||
* Temporarily enable floating-point so the registers
|
||||
* can be restored.
|
||||
*/
|
||||
msr = mfmsr();
|
||||
mtmsr(msr | PSL_FP);
|
||||
isync();
|
||||
|
||||
/*
|
||||
* Load the floating point registers and FPSCR from the PCB.
|
||||
* (A value of 0xff for mtfsf specifies that all 8 4-bit fields
|
||||
* of the saved FPSCR are to be loaded from the FPU reg).
|
||||
*/
|
||||
__asm __volatile ("lfd 0,0(%0); mtfsf 0xff,0"
|
||||
:: "b"(&pcb->pcb_fpu.fpscr));
|
||||
__asm ("lfd 0,0(%0);"
|
||||
"lfd 1,8(%0);"
|
||||
"lfd 2,16(%0);"
|
||||
"lfd 3,24(%0);"
|
||||
"lfd 4,32(%0);"
|
||||
"lfd 5,40(%0);"
|
||||
"lfd 6,48(%0);"
|
||||
"lfd 7,56(%0);"
|
||||
"lfd 8,64(%0);"
|
||||
"lfd 9,72(%0);"
|
||||
"lfd 10,80(%0);"
|
||||
"lfd 11,88(%0);"
|
||||
"lfd 12,96(%0);"
|
||||
"lfd 13,104(%0);"
|
||||
"lfd 14,112(%0);"
|
||||
"lfd 15,120(%0);"
|
||||
"lfd 16,128(%0);"
|
||||
"lfd 17,136(%0);"
|
||||
"lfd 18,144(%0);"
|
||||
"lfd 19,152(%0);"
|
||||
"lfd 20,160(%0);"
|
||||
"lfd 21,168(%0);"
|
||||
"lfd 22,176(%0);"
|
||||
"lfd 23,184(%0);"
|
||||
"lfd 24,192(%0);"
|
||||
"lfd 25,200(%0);"
|
||||
"lfd 26,208(%0);"
|
||||
"lfd 27,216(%0);"
|
||||
"lfd 28,224(%0);"
|
||||
"lfd 29,232(%0);"
|
||||
"lfd 30,240(%0);"
|
||||
"lfd 31,248(%0)" :: "b"(&pcb->pcb_fpu.fpr[0]));
|
||||
__asm __volatile ("mtmsr %0; isync" :: "r"(msr));
|
||||
|
||||
#define LFP(n) __asm ("lfd " #n ", 0(%0)" \
|
||||
:: "b"(&pcb->pcb_fpu.fpr[n]));
|
||||
LFP(0); LFP(1); LFP(2); LFP(3);
|
||||
LFP(4); LFP(5); LFP(6); LFP(7);
|
||||
LFP(8); LFP(9); LFP(10); LFP(11);
|
||||
LFP(12); LFP(13); LFP(14); LFP(15);
|
||||
LFP(16); LFP(17); LFP(18); LFP(19);
|
||||
LFP(20); LFP(21); LFP(22); LFP(23);
|
||||
LFP(24); LFP(25); LFP(26); LFP(27);
|
||||
LFP(28); LFP(29); LFP(30); LFP(31);
|
||||
#undef LFP
|
||||
|
||||
isync();
|
||||
mtmsr(msr);
|
||||
}
|
||||
|
||||
void
|
||||
save_fpu(struct thread *td)
|
||||
{
|
||||
int msr, scratch;
|
||||
int msr;
|
||||
struct pcb *pcb;
|
||||
|
||||
pcb = td->td_pcb;
|
||||
|
||||
__asm __volatile ("mfmsr %0; ori %1,%0,%2; mtmsr %1; isync"
|
||||
: "=r"(msr), "=r"(scratch) : "K"(PSL_FP));
|
||||
__asm ("stfd 0,0(%0);"
|
||||
"stfd 1,8(%0);"
|
||||
"stfd 2,16(%0);"
|
||||
"stfd 3,24(%0);"
|
||||
"stfd 4,32(%0);"
|
||||
"stfd 5,40(%0);"
|
||||
"stfd 6,48(%0);"
|
||||
"stfd 7,56(%0);"
|
||||
"stfd 8,64(%0);"
|
||||
"stfd 9,72(%0);"
|
||||
"stfd 10,80(%0);"
|
||||
"stfd 11,88(%0);"
|
||||
"stfd 12,96(%0);"
|
||||
"stfd 13,104(%0);"
|
||||
"stfd 14,112(%0);"
|
||||
"stfd 15,120(%0);"
|
||||
"stfd 16,128(%0);"
|
||||
"stfd 17,136(%0);"
|
||||
"stfd 18,144(%0);"
|
||||
"stfd 19,152(%0);"
|
||||
"stfd 20,160(%0);"
|
||||
"stfd 21,168(%0);"
|
||||
"stfd 22,176(%0);"
|
||||
"stfd 23,184(%0);"
|
||||
"stfd 24,192(%0);"
|
||||
"stfd 25,200(%0);"
|
||||
"stfd 26,208(%0);"
|
||||
"stfd 27,216(%0);"
|
||||
"stfd 28,224(%0);"
|
||||
"stfd 29,232(%0);"
|
||||
"stfd 30,240(%0);"
|
||||
"stfd 31,248(%0)" :: "b"(&pcb->pcb_fpu.fpr[0]));
|
||||
/*
|
||||
* Temporarily re-enable floating-point during the save
|
||||
*/
|
||||
msr = mfmsr();
|
||||
mtmsr(msr | PSL_FP);
|
||||
isync();
|
||||
|
||||
/*
|
||||
* Save the floating-point registers and FPSCR to the PCB
|
||||
*/
|
||||
#define SFP(n) __asm ("stfd " #n ", 0(%0)" \
|
||||
:: "b"(&pcb->pcb_fpu.fpr[n]));
|
||||
SFP(0); SFP(1); SFP(2); SFP(3);
|
||||
SFP(4); SFP(5); SFP(6); SFP(7);
|
||||
SFP(8); SFP(9); SFP(10); SFP(11);
|
||||
SFP(12); SFP(13); SFP(14); SFP(15);
|
||||
SFP(16); SFP(17); SFP(18); SFP(19);
|
||||
SFP(20); SFP(21); SFP(22); SFP(23);
|
||||
SFP(24); SFP(25); SFP(26); SFP(27);
|
||||
SFP(28); SFP(29); SFP(30); SFP(31);
|
||||
#undef SFP
|
||||
__asm __volatile ("mffs 0; stfd 0,0(%0)" :: "b"(&pcb->pcb_fpu.fpscr));
|
||||
__asm __volatile ("mtmsr %0; isync" :: "r"(msr));
|
||||
|
||||
/*
|
||||
* Disable floating-point again
|
||||
*/
|
||||
isync();
|
||||
mtmsr(msr);
|
||||
|
||||
/*
|
||||
* Clear the current fp thread and pcb's CPU id
|
||||
* XXX should this be left clear to allow lazy save/restore ?
|
||||
*/
|
||||
pcb->pcb_fpcpu = NULL;
|
||||
PCPU_SET(fputhread, NULL);
|
||||
}
|
||||
|
||||
|
@ -137,6 +137,7 @@ ASSYM(PCB_LR, offsetof(struct pcb, pcb_lr));
|
||||
ASSYM(PCB_USR, offsetof(struct pcb, pcb_usr));
|
||||
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
|
||||
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
|
||||
ASSYM(PCB_FPU, PCB_FPU);
|
||||
|
||||
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
|
||||
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
|
||||
|
@ -88,12 +88,11 @@ ENTRY(cpu_switch)
|
||||
stw %r16,PCB_USR(%r3)
|
||||
stw %r1,PCB_SP(%r3) /* Save the stack pointer */
|
||||
|
||||
#if 0
|
||||
lwz %r29,PCB_FLAGS(%r3)
|
||||
andi. %r9, %r29, 1 /* XXX - don't hard code */
|
||||
lwz %r5,PCB_FLAGS(%r3) /* Save FPU context if needed */
|
||||
andi. %r5, %r5, PCB_FPU
|
||||
beq .L1
|
||||
mr %r3,%r15
|
||||
bl save_fpu
|
||||
#endif
|
||||
|
||||
.L1:
|
||||
bl choosethread /* Find a new thread to run */
|
||||
@ -111,16 +110,14 @@ ENTRY(cpu_switch)
|
||||
mfsprg %r4,0 /* Get the pcpu pointer */
|
||||
stw %r16,PC_CURTHREAD(%r4) /* Store new current thread */
|
||||
mr %r3,%r16 /* and save in r3 */
|
||||
lwz %r16,TD_PCB(%r16) /* Store new current PCB */
|
||||
stw %r16,PC_CURPCB(%r4)
|
||||
lwz %r17,TD_PCB(%r16) /* Store new current PCB */
|
||||
stw %r17,PC_CURPCB(%r4)
|
||||
|
||||
#if 0
|
||||
lwz %r29, PCB_FLAGS(%r4) /* Restore FPU regs if needed */
|
||||
andi. %r9, %r29, 1
|
||||
lwz %r5, PCB_FLAGS(%r17) /* Restore FPU context if needed */
|
||||
andi. %r5, %r5, PCB_FPU
|
||||
beq .L2
|
||||
mr %r3, %r4
|
||||
bl enable_fpu
|
||||
#endif
|
||||
mr %r3,%r16 /* Recover new curthread */
|
||||
|
||||
/* thread to restore is in r3 */
|
||||
.L2:
|
||||
|
@ -150,7 +150,7 @@ trapname(u_int vector)
|
||||
void
|
||||
trap(struct trapframe *frame)
|
||||
{
|
||||
struct thread *td, *fputhread;
|
||||
struct thread *td;
|
||||
struct proc *p;
|
||||
int sig, type, user;
|
||||
u_int sticks, ucode;
|
||||
@ -192,13 +192,9 @@ trap(struct trapframe *frame)
|
||||
break;
|
||||
|
||||
case EXC_FPU:
|
||||
if ((fputhread = PCPU_GET(fputhread)) != NULL) {
|
||||
save_fpu(fputhread);
|
||||
}
|
||||
PCPU_SET(fputhread, td);
|
||||
td->td_pcb->pcb_fpcpu = PCPU_GET(cpuid);
|
||||
KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
|
||||
("FPU already enabled for thread"));
|
||||
enable_fpu(td);
|
||||
frame->srr1 |= PSL_FP;
|
||||
break;
|
||||
|
||||
#ifdef ALTIVEC
|
||||
@ -253,10 +249,6 @@ trap(struct trapframe *frame)
|
||||
}
|
||||
}
|
||||
|
||||
if (td != PCPU_GET(fputhread) ||
|
||||
td->td_pcb->pcb_fpcpu != PCPU_GET(cpuid))
|
||||
frame->srr1 &= ~PSL_FP;
|
||||
|
||||
#ifdef ALTIVEC
|
||||
if (td != PCPU_GET(vecthread) ||
|
||||
td->td_pcb->pcb_veccpu != PCPU_GET(cpuid))
|
||||
|
Loading…
x
Reference in New Issue
Block a user