Reimplement the lazy FP context switching:
o Move all code into a single file for easier maintenance. o Use a single global lock to avoid having to handle either multiple locks or race conditions. o Make sure to disable the high FP registers after saving or dropping them. o use msleep() to wait for the other CPU to save the high FP registers. This change fixes the high FP inconsistency panics. A single global lock typically serializes too much, which may be noticable when a lot of threads use the high FP registers, but in that case it's probably better to switch the high FP context synchronuously. Put differently: cpu_switch() should switch the high FP registers if the incoming and outgoing threads both use the high FP registers.
This commit is contained in:
parent
113d2ed8a6
commit
943e1b107a
@ -84,6 +84,7 @@ ia64/ia64/elf_machdep.c standard
|
||||
ia64/ia64/emulate.c standard
|
||||
ia64/ia64/exception.S standard
|
||||
ia64/ia64/gdb_machdep.c optional gdb
|
||||
ia64/ia64/highfp.c standard
|
||||
ia64/ia64/in_cksum.c optional inet
|
||||
ia64/ia64/interrupt.c standard
|
||||
ia64/ia64/locore.S standard no-obj
|
||||
|
181
sys/ia64/ia64/highfp.c
Normal file
181
sys/ia64/ia64/highfp.c
Normal file
@ -0,0 +1,181 @@
|
||||
/*-
|
||||
* Copyright (c) 2009 Marcel Moolenaar
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
|
||||
#include <machine/frame.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/smp.h>
|
||||
|
||||
static struct mtx ia64_highfp_mtx;
|
||||
|
||||
static void
|
||||
ia64_highfp_init(void *_)
|
||||
{
|
||||
mtx_init(&ia64_highfp_mtx, "High FP lock", NULL, MTX_SPIN);
|
||||
}
|
||||
SYSINIT(ia64_highfp_init, SI_SUB_LOCK, SI_ORDER_ANY, ia64_highfp_init, NULL);
|
||||
|
||||
#ifdef SMP
|
||||
static int
|
||||
ia64_highfp_ipi(struct pcpu *cpu)
|
||||
{
|
||||
int error;
|
||||
|
||||
ipi_send(cpu, IPI_HIGH_FP);
|
||||
error = msleep_spin(&cpu->pc_fpcurthread, &ia64_highfp_mtx,
|
||||
"High FP", 0);
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
ia64_highfp_drop(struct thread *td)
|
||||
{
|
||||
struct pcb *pcb;
|
||||
struct pcpu *cpu;
|
||||
|
||||
pcb = td->td_pcb;
|
||||
|
||||
mtx_lock_spin(&ia64_highfp_mtx);
|
||||
cpu = pcb->pcb_fpcpu;
|
||||
if (cpu != NULL) {
|
||||
KASSERT(cpu->pc_fpcurthread == td,
|
||||
("cpu->pc_fpcurthread != td"));
|
||||
td->td_frame->tf_special.psr |= IA64_PSR_DFH;
|
||||
pcb->pcb_fpcpu = NULL;
|
||||
cpu->pc_fpcurthread = NULL;
|
||||
}
|
||||
mtx_unlock_spin(&ia64_highfp_mtx);
|
||||
|
||||
return ((cpu != NULL) ? 1 : 0);
|
||||
}
|
||||
|
||||
int
|
||||
ia64_highfp_enable(struct thread *td, struct trapframe *tf)
|
||||
{
|
||||
struct pcb *pcb;
|
||||
struct pcpu *cpu;
|
||||
struct thread *td1;
|
||||
|
||||
pcb = td->td_pcb;
|
||||
|
||||
mtx_lock_spin(&ia64_highfp_mtx);
|
||||
KASSERT((tf->tf_special.psr & IA64_PSR_DFH) != 0,
|
||||
("(tf->tf_special.psr & IA64_PSR_DFH) == 0"));
|
||||
cpu = pcb->pcb_fpcpu;
|
||||
#ifdef SMP
|
||||
if (cpu != NULL && cpu != pcpup) {
|
||||
KASSERT(cpu->pc_fpcurthread == td,
|
||||
("cpu->pc_fpcurthread != td"));
|
||||
ia64_highfp_ipi(cpu);
|
||||
}
|
||||
#endif
|
||||
td1 = PCPU_GET(fpcurthread);
|
||||
if (td1 != NULL && td1 != td) {
|
||||
KASSERT(td1->td_pcb->pcb_fpcpu == pcpup,
|
||||
("td1->td_pcb->pcb_fpcpu != pcpup"));
|
||||
save_high_fp(&td1->td_pcb->pcb_high_fp);
|
||||
td1->td_frame->tf_special.psr |= IA64_PSR_DFH;
|
||||
td1->td_pcb->pcb_fpcpu = NULL;
|
||||
PCPU_SET(fpcurthread, NULL);
|
||||
td1 = NULL;
|
||||
}
|
||||
if (td1 == NULL) {
|
||||
KASSERT(pcb->pcb_fpcpu == NULL, ("pcb->pcb_fpcpu != NULL"));
|
||||
KASSERT(PCPU_GET(fpcurthread) == NULL,
|
||||
("PCPU_GET(fpcurthread) != NULL"));
|
||||
restore_high_fp(&pcb->pcb_high_fp);
|
||||
PCPU_SET(fpcurthread, td);
|
||||
pcb->pcb_fpcpu = pcpup;
|
||||
tf->tf_special.psr &= ~IA64_PSR_MFH;
|
||||
}
|
||||
tf->tf_special.psr &= ~IA64_PSR_DFH;
|
||||
mtx_unlock_spin(&ia64_highfp_mtx);
|
||||
|
||||
return ((td1 != NULL) ? 1 : 0);
|
||||
}
|
||||
|
||||
int
|
||||
ia64_highfp_save(struct thread *td)
|
||||
{
|
||||
struct pcb *pcb;
|
||||
struct pcpu *cpu;
|
||||
|
||||
pcb = td->td_pcb;
|
||||
|
||||
mtx_lock_spin(&ia64_highfp_mtx);
|
||||
cpu = pcb->pcb_fpcpu;
|
||||
#ifdef SMP
|
||||
if (cpu != NULL && cpu != pcpup) {
|
||||
KASSERT(cpu->pc_fpcurthread == td,
|
||||
("cpu->pc_fpcurthread != td"));
|
||||
ia64_highfp_ipi(cpu);
|
||||
} else
|
||||
#endif
|
||||
if (cpu != NULL) {
|
||||
KASSERT(cpu->pc_fpcurthread == td,
|
||||
("cpu->pc_fpcurthread != td"));
|
||||
save_high_fp(&pcb->pcb_high_fp);
|
||||
td->td_frame->tf_special.psr |= IA64_PSR_DFH;
|
||||
pcb->pcb_fpcpu = NULL;
|
||||
cpu->pc_fpcurthread = NULL;
|
||||
}
|
||||
mtx_unlock_spin(&ia64_highfp_mtx);
|
||||
|
||||
return ((cpu != NULL) ? 1 : 0);
|
||||
}
|
||||
|
||||
#ifdef SMP
|
||||
int
|
||||
ia64_highfp_save_ipi(void)
|
||||
{
|
||||
struct thread *td;
|
||||
|
||||
mtx_lock_spin(&ia64_highfp_mtx);
|
||||
td = PCPU_GET(fpcurthread);
|
||||
if (td != NULL) {
|
||||
KASSERT(td->td_pcb->pcb_fpcpu == pcpup,
|
||||
("td->td_pcb->pcb_fpcpu != pcpup"));
|
||||
save_high_fp(&td->td_pcb->pcb_high_fp);
|
||||
td->td_frame->tf_special.psr |= IA64_PSR_DFH;
|
||||
td->td_pcb->pcb_fpcpu = NULL;
|
||||
PCPU_SET(fpcurthread, NULL);
|
||||
}
|
||||
mtx_unlock_spin(&ia64_highfp_mtx);
|
||||
wakeup(&PCPU_GET(fpcurthread));
|
||||
|
||||
return ((td != NULL) ? 1 : 0);
|
||||
}
|
||||
#endif
|
@ -216,14 +216,7 @@ interrupt(struct trapframe *tf)
|
||||
asts[PCPU_GET(cpuid)]++;
|
||||
CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid));
|
||||
} else if (vector == ipi_vector[IPI_HIGH_FP]) {
|
||||
struct thread *thr = PCPU_GET(fpcurthread);
|
||||
if (thr != NULL) {
|
||||
mtx_lock_spin(&thr->td_md.md_highfp_mtx);
|
||||
save_high_fp(&thr->td_pcb->pcb_high_fp);
|
||||
thr->td_pcb->pcb_fpcpu = NULL;
|
||||
PCPU_SET(fpcurthread, NULL);
|
||||
mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
|
||||
}
|
||||
ia64_highfp_save_ipi();
|
||||
} else if (vector == ipi_vector[IPI_RENDEZVOUS]) {
|
||||
rdvs[PCPU_GET(cpuid)]++;
|
||||
CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid));
|
||||
|
@ -1461,81 +1461,6 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* High FP register functions.
|
||||
*/
|
||||
|
||||
int
|
||||
ia64_highfp_drop(struct thread *td)
|
||||
{
|
||||
struct pcb *pcb;
|
||||
struct pcpu *cpu;
|
||||
struct thread *thr;
|
||||
|
||||
mtx_lock_spin(&td->td_md.md_highfp_mtx);
|
||||
pcb = td->td_pcb;
|
||||
cpu = pcb->pcb_fpcpu;
|
||||
if (cpu == NULL) {
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
return (0);
|
||||
}
|
||||
pcb->pcb_fpcpu = NULL;
|
||||
thr = cpu->pc_fpcurthread;
|
||||
cpu->pc_fpcurthread = NULL;
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
|
||||
/* Post-mortem sanity checking. */
|
||||
KASSERT(thr == td, ("Inconsistent high FP state"));
|
||||
return (1);
|
||||
}
|
||||
|
||||
int
|
||||
ia64_highfp_save(struct thread *td)
|
||||
{
|
||||
struct pcb *pcb;
|
||||
struct pcpu *cpu;
|
||||
struct thread *thr;
|
||||
|
||||
/* Don't save if the high FP registers weren't modified. */
|
||||
if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0)
|
||||
return (ia64_highfp_drop(td));
|
||||
|
||||
mtx_lock_spin(&td->td_md.md_highfp_mtx);
|
||||
pcb = td->td_pcb;
|
||||
cpu = pcb->pcb_fpcpu;
|
||||
if (cpu == NULL) {
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
return (0);
|
||||
}
|
||||
#ifdef SMP
|
||||
if (td == curthread)
|
||||
sched_pin();
|
||||
if (cpu != pcpup) {
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
ipi_send(cpu, IPI_HIGH_FP);
|
||||
if (td == curthread)
|
||||
sched_unpin();
|
||||
while (pcb->pcb_fpcpu == cpu)
|
||||
DELAY(100);
|
||||
return (1);
|
||||
} else {
|
||||
save_high_fp(&pcb->pcb_high_fp);
|
||||
if (td == curthread)
|
||||
sched_unpin();
|
||||
}
|
||||
#else
|
||||
save_high_fp(&pcb->pcb_high_fp);
|
||||
#endif
|
||||
pcb->pcb_fpcpu = NULL;
|
||||
thr = cpu->pc_fpcurthread;
|
||||
cpu->pc_fpcurthread = NULL;
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
|
||||
/* Post-mortem sanity cxhecking. */
|
||||
KASSERT(thr == td, ("Inconsistent high FP state"));
|
||||
return (1);
|
||||
}
|
||||
|
||||
void
|
||||
ia64_sync_icache(vm_offset_t va, vm_offset_t sz)
|
||||
{
|
||||
|
@ -652,66 +652,10 @@ trap(int vector, struct trapframe *tf)
|
||||
break;
|
||||
|
||||
case IA64_VEC_DISABLED_FP: {
|
||||
struct pcpu *pcpu;
|
||||
struct pcb *pcb;
|
||||
struct thread *thr;
|
||||
|
||||
/* Always fatal in kernel. Should never happen. */
|
||||
if (!user)
|
||||
if (user)
|
||||
ia64_highfp_enable(td, tf);
|
||||
else
|
||||
trap_panic(vector, tf);
|
||||
|
||||
sched_pin();
|
||||
thr = PCPU_GET(fpcurthread);
|
||||
if (thr == td) {
|
||||
/*
|
||||
* Short-circuit handling the trap when this CPU
|
||||
* already holds the high FP registers for this
|
||||
* thread. We really shouldn't get the trap in the
|
||||
* first place, but since it's only a performance
|
||||
* issue and not a correctness issue, we emit a
|
||||
* message for now, enable the high FP registers and
|
||||
* return.
|
||||
*/
|
||||
printf("XXX: bogusly disabled high FP regs\n");
|
||||
tf->tf_special.psr &= ~IA64_PSR_DFH;
|
||||
sched_unpin();
|
||||
goto out;
|
||||
} else if (thr != NULL) {
|
||||
mtx_lock_spin(&thr->td_md.md_highfp_mtx);
|
||||
pcb = thr->td_pcb;
|
||||
save_high_fp(&pcb->pcb_high_fp);
|
||||
pcb->pcb_fpcpu = NULL;
|
||||
PCPU_SET(fpcurthread, NULL);
|
||||
mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
|
||||
thr = NULL;
|
||||
}
|
||||
|
||||
mtx_lock_spin(&td->td_md.md_highfp_mtx);
|
||||
pcb = td->td_pcb;
|
||||
pcpu = pcb->pcb_fpcpu;
|
||||
|
||||
#ifdef SMP
|
||||
if (pcpu != NULL) {
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
ipi_send(pcpu, IPI_HIGH_FP);
|
||||
while (pcb->pcb_fpcpu == pcpu)
|
||||
DELAY(100);
|
||||
mtx_lock_spin(&td->td_md.md_highfp_mtx);
|
||||
pcpu = pcb->pcb_fpcpu;
|
||||
thr = PCPU_GET(fpcurthread);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (thr == NULL && pcpu == NULL) {
|
||||
restore_high_fp(&pcb->pcb_high_fp);
|
||||
PCPU_SET(fpcurthread, td);
|
||||
pcb->pcb_fpcpu = pcpup;
|
||||
tf->tf_special.psr &= ~IA64_PSR_MFH;
|
||||
tf->tf_special.psr &= ~IA64_PSR_DFH;
|
||||
}
|
||||
|
||||
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
|
||||
sched_unpin();
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -120,14 +120,11 @@ cpu_thread_alloc(struct thread *td)
|
||||
sp -= sizeof(struct trapframe);
|
||||
td->td_frame = (struct trapframe *)sp;
|
||||
td->td_frame->tf_length = sizeof(struct trapframe);
|
||||
mtx_init(&td->td_md.md_highfp_mtx, "High FP lock", NULL, MTX_SPIN);
|
||||
}
|
||||
|
||||
void
|
||||
cpu_thread_free(struct thread *td)
|
||||
{
|
||||
|
||||
mtx_destroy(&td->td_md.md_highfp_mtx);
|
||||
}
|
||||
|
||||
void
|
||||
@ -148,6 +145,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
|
||||
struct pcb *pcb;
|
||||
struct trapframe *tf;
|
||||
|
||||
ia64_highfp_save(td0);
|
||||
|
||||
tf = td->td_frame;
|
||||
KASSERT(tf != NULL, ("foo"));
|
||||
bcopy(td0->td_frame, tf, sizeof(*tf));
|
||||
|
@ -86,7 +86,9 @@ int ia64_emulate(struct trapframe *, struct thread *);
|
||||
int ia64_flush_dirty(struct thread *, struct _special *);
|
||||
uint64_t ia64_get_hcdp(void);
|
||||
int ia64_highfp_drop(struct thread *);
|
||||
int ia64_highfp_enable(struct thread *, struct trapframe *);
|
||||
int ia64_highfp_save(struct thread *);
|
||||
int ia64_highfp_save_ipi(void);
|
||||
struct ia64_init_return ia64_init(void);
|
||||
void ia64_probe_sapics(void);
|
||||
void ia64_sync_icache(vm_offset_t, vm_size_t);
|
||||
|
@ -30,7 +30,6 @@
|
||||
#define _MACHINE_PROC_H_
|
||||
|
||||
struct mdthread {
|
||||
struct mtx md_highfp_mtx;
|
||||
int md_spinlock_count; /* (k) */
|
||||
int md_saved_intr; /* (k) */
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user