Reimplement the lazy FP context switching:

o   Move all code into a single file for easier maintenance.
o   Use a single global lock to avoid having to handle either
    multiple locks or race conditions.
o   Make sure to disable the high FP registers after saving
    or dropping them.
o   use msleep() to wait for the other CPU to save the high
    FP registers.

This change fixes the high FP inconsistency panics.

A single global lock typically serializes too much, which may
be noticable when a lot of threads use the high FP registers,
but in that case it's probably better to switch the high FP
context synchronuously. Put differently: cpu_switch() should
switch the high FP registers if the incoming and outgoing
threads both use the high FP registers.
This commit is contained in:
Marcel Moolenaar 2009-10-31 22:27:31 +00:00
parent 156ef7611e
commit 8d077f48f0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=198733
8 changed files with 190 additions and 146 deletions

View File

@ -84,6 +84,7 @@ ia64/ia64/elf_machdep.c standard
ia64/ia64/emulate.c standard
ia64/ia64/exception.S standard
ia64/ia64/gdb_machdep.c optional gdb
ia64/ia64/highfp.c standard
ia64/ia64/in_cksum.c optional inet
ia64/ia64/interrupt.c standard
ia64/ia64/locore.S standard no-obj

181
sys/ia64/ia64/highfp.c Normal file
View File

@ -0,0 +1,181 @@
/*-
* Copyright (c) 2009 Marcel Moolenaar
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <machine/frame.h>
#include <machine/md_var.h>
#include <machine/smp.h>
static struct mtx ia64_highfp_mtx;
static void
ia64_highfp_init(void *_)
{
mtx_init(&ia64_highfp_mtx, "High FP lock", NULL, MTX_SPIN);
}
SYSINIT(ia64_highfp_init, SI_SUB_LOCK, SI_ORDER_ANY, ia64_highfp_init, NULL);
#ifdef SMP
static int
ia64_highfp_ipi(struct pcpu *cpu)
{
int error;
ipi_send(cpu, IPI_HIGH_FP);
error = msleep_spin(&cpu->pc_fpcurthread, &ia64_highfp_mtx,
"High FP", 0);
return (error);
}
#endif
int
ia64_highfp_drop(struct thread *td)
{
struct pcb *pcb;
struct pcpu *cpu;
pcb = td->td_pcb;
mtx_lock_spin(&ia64_highfp_mtx);
cpu = pcb->pcb_fpcpu;
if (cpu != NULL) {
KASSERT(cpu->pc_fpcurthread == td,
("cpu->pc_fpcurthread != td"));
td->td_frame->tf_special.psr |= IA64_PSR_DFH;
pcb->pcb_fpcpu = NULL;
cpu->pc_fpcurthread = NULL;
}
mtx_unlock_spin(&ia64_highfp_mtx);
return ((cpu != NULL) ? 1 : 0);
}
int
ia64_highfp_enable(struct thread *td, struct trapframe *tf)
{
struct pcb *pcb;
struct pcpu *cpu;
struct thread *td1;
pcb = td->td_pcb;
mtx_lock_spin(&ia64_highfp_mtx);
KASSERT((tf->tf_special.psr & IA64_PSR_DFH) != 0,
("(tf->tf_special.psr & IA64_PSR_DFH) == 0"));
cpu = pcb->pcb_fpcpu;
#ifdef SMP
if (cpu != NULL && cpu != pcpup) {
KASSERT(cpu->pc_fpcurthread == td,
("cpu->pc_fpcurthread != td"));
ia64_highfp_ipi(cpu);
}
#endif
td1 = PCPU_GET(fpcurthread);
if (td1 != NULL && td1 != td) {
KASSERT(td1->td_pcb->pcb_fpcpu == pcpup,
("td1->td_pcb->pcb_fpcpu != pcpup"));
save_high_fp(&td1->td_pcb->pcb_high_fp);
td1->td_frame->tf_special.psr |= IA64_PSR_DFH;
td1->td_pcb->pcb_fpcpu = NULL;
PCPU_SET(fpcurthread, NULL);
td1 = NULL;
}
if (td1 == NULL) {
KASSERT(pcb->pcb_fpcpu == NULL, ("pcb->pcb_fpcpu != NULL"));
KASSERT(PCPU_GET(fpcurthread) == NULL,
("PCPU_GET(fpcurthread) != NULL"));
restore_high_fp(&pcb->pcb_high_fp);
PCPU_SET(fpcurthread, td);
pcb->pcb_fpcpu = pcpup;
tf->tf_special.psr &= ~IA64_PSR_MFH;
}
tf->tf_special.psr &= ~IA64_PSR_DFH;
mtx_unlock_spin(&ia64_highfp_mtx);
return ((td1 != NULL) ? 1 : 0);
}
int
ia64_highfp_save(struct thread *td)
{
struct pcb *pcb;
struct pcpu *cpu;
pcb = td->td_pcb;
mtx_lock_spin(&ia64_highfp_mtx);
cpu = pcb->pcb_fpcpu;
#ifdef SMP
if (cpu != NULL && cpu != pcpup) {
KASSERT(cpu->pc_fpcurthread == td,
("cpu->pc_fpcurthread != td"));
ia64_highfp_ipi(cpu);
} else
#endif
if (cpu != NULL) {
KASSERT(cpu->pc_fpcurthread == td,
("cpu->pc_fpcurthread != td"));
save_high_fp(&pcb->pcb_high_fp);
td->td_frame->tf_special.psr |= IA64_PSR_DFH;
pcb->pcb_fpcpu = NULL;
cpu->pc_fpcurthread = NULL;
}
mtx_unlock_spin(&ia64_highfp_mtx);
return ((cpu != NULL) ? 1 : 0);
}
#ifdef SMP
int
ia64_highfp_save_ipi(void)
{
struct thread *td;
mtx_lock_spin(&ia64_highfp_mtx);
td = PCPU_GET(fpcurthread);
if (td != NULL) {
KASSERT(td->td_pcb->pcb_fpcpu == pcpup,
("td->td_pcb->pcb_fpcpu != pcpup"));
save_high_fp(&td->td_pcb->pcb_high_fp);
td->td_frame->tf_special.psr |= IA64_PSR_DFH;
td->td_pcb->pcb_fpcpu = NULL;
PCPU_SET(fpcurthread, NULL);
}
mtx_unlock_spin(&ia64_highfp_mtx);
wakeup(&PCPU_GET(fpcurthread));
return ((td != NULL) ? 1 : 0);
}
#endif

View File

@ -216,14 +216,7 @@ interrupt(struct trapframe *tf)
asts[PCPU_GET(cpuid)]++;
CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid));
} else if (vector == ipi_vector[IPI_HIGH_FP]) {
struct thread *thr = PCPU_GET(fpcurthread);
if (thr != NULL) {
mtx_lock_spin(&thr->td_md.md_highfp_mtx);
save_high_fp(&thr->td_pcb->pcb_high_fp);
thr->td_pcb->pcb_fpcpu = NULL;
PCPU_SET(fpcurthread, NULL);
mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
}
ia64_highfp_save_ipi();
} else if (vector == ipi_vector[IPI_RENDEZVOUS]) {
rdvs[PCPU_GET(cpuid)]++;
CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid));

View File

@ -1461,81 +1461,6 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
return (0);
}
/*
* High FP register functions.
*/
int
ia64_highfp_drop(struct thread *td)
{
struct pcb *pcb;
struct pcpu *cpu;
struct thread *thr;
mtx_lock_spin(&td->td_md.md_highfp_mtx);
pcb = td->td_pcb;
cpu = pcb->pcb_fpcpu;
if (cpu == NULL) {
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
return (0);
}
pcb->pcb_fpcpu = NULL;
thr = cpu->pc_fpcurthread;
cpu->pc_fpcurthread = NULL;
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
/* Post-mortem sanity checking. */
KASSERT(thr == td, ("Inconsistent high FP state"));
return (1);
}
int
ia64_highfp_save(struct thread *td)
{
struct pcb *pcb;
struct pcpu *cpu;
struct thread *thr;
/* Don't save if the high FP registers weren't modified. */
if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0)
return (ia64_highfp_drop(td));
mtx_lock_spin(&td->td_md.md_highfp_mtx);
pcb = td->td_pcb;
cpu = pcb->pcb_fpcpu;
if (cpu == NULL) {
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
return (0);
}
#ifdef SMP
if (td == curthread)
sched_pin();
if (cpu != pcpup) {
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
ipi_send(cpu, IPI_HIGH_FP);
if (td == curthread)
sched_unpin();
while (pcb->pcb_fpcpu == cpu)
DELAY(100);
return (1);
} else {
save_high_fp(&pcb->pcb_high_fp);
if (td == curthread)
sched_unpin();
}
#else
save_high_fp(&pcb->pcb_high_fp);
#endif
pcb->pcb_fpcpu = NULL;
thr = cpu->pc_fpcurthread;
cpu->pc_fpcurthread = NULL;
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
/* Post-mortem sanity cxhecking. */
KASSERT(thr == td, ("Inconsistent high FP state"));
return (1);
}
void
ia64_sync_icache(vm_offset_t va, vm_offset_t sz)
{

View File

@ -652,66 +652,10 @@ trap(int vector, struct trapframe *tf)
break;
case IA64_VEC_DISABLED_FP: {
struct pcpu *pcpu;
struct pcb *pcb;
struct thread *thr;
/* Always fatal in kernel. Should never happen. */
if (!user)
if (user)
ia64_highfp_enable(td, tf);
else
trap_panic(vector, tf);
sched_pin();
thr = PCPU_GET(fpcurthread);
if (thr == td) {
/*
* Short-circuit handling the trap when this CPU
* already holds the high FP registers for this
* thread. We really shouldn't get the trap in the
* first place, but since it's only a performance
* issue and not a correctness issue, we emit a
* message for now, enable the high FP registers and
* return.
*/
printf("XXX: bogusly disabled high FP regs\n");
tf->tf_special.psr &= ~IA64_PSR_DFH;
sched_unpin();
goto out;
} else if (thr != NULL) {
mtx_lock_spin(&thr->td_md.md_highfp_mtx);
pcb = thr->td_pcb;
save_high_fp(&pcb->pcb_high_fp);
pcb->pcb_fpcpu = NULL;
PCPU_SET(fpcurthread, NULL);
mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
thr = NULL;
}
mtx_lock_spin(&td->td_md.md_highfp_mtx);
pcb = td->td_pcb;
pcpu = pcb->pcb_fpcpu;
#ifdef SMP
if (pcpu != NULL) {
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
ipi_send(pcpu, IPI_HIGH_FP);
while (pcb->pcb_fpcpu == pcpu)
DELAY(100);
mtx_lock_spin(&td->td_md.md_highfp_mtx);
pcpu = pcb->pcb_fpcpu;
thr = PCPU_GET(fpcurthread);
}
#endif
if (thr == NULL && pcpu == NULL) {
restore_high_fp(&pcb->pcb_high_fp);
PCPU_SET(fpcurthread, td);
pcb->pcb_fpcpu = pcpup;
tf->tf_special.psr &= ~IA64_PSR_MFH;
tf->tf_special.psr &= ~IA64_PSR_DFH;
}
mtx_unlock_spin(&td->td_md.md_highfp_mtx);
sched_unpin();
goto out;
}

View File

@ -120,14 +120,11 @@ cpu_thread_alloc(struct thread *td)
sp -= sizeof(struct trapframe);
td->td_frame = (struct trapframe *)sp;
td->td_frame->tf_length = sizeof(struct trapframe);
mtx_init(&td->td_md.md_highfp_mtx, "High FP lock", NULL, MTX_SPIN);
}
void
cpu_thread_free(struct thread *td)
{
mtx_destroy(&td->td_md.md_highfp_mtx);
}
void
@ -148,6 +145,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
struct pcb *pcb;
struct trapframe *tf;
ia64_highfp_save(td0);
tf = td->td_frame;
KASSERT(tf != NULL, ("foo"));
bcopy(td0->td_frame, tf, sizeof(*tf));

View File

@ -86,7 +86,9 @@ int ia64_emulate(struct trapframe *, struct thread *);
int ia64_flush_dirty(struct thread *, struct _special *);
uint64_t ia64_get_hcdp(void);
int ia64_highfp_drop(struct thread *);
int ia64_highfp_enable(struct thread *, struct trapframe *);
int ia64_highfp_save(struct thread *);
int ia64_highfp_save_ipi(void);
struct ia64_init_return ia64_init(void);
void ia64_probe_sapics(void);
void ia64_sync_icache(vm_offset_t, vm_size_t);

View File

@ -30,7 +30,6 @@
#define _MACHINE_PROC_H_
struct mdthread {
struct mtx md_highfp_mtx;
int md_spinlock_count; /* (k) */
int md_saved_intr; /* (k) */
};