Detect interrupt storms better. The storm detection didn't work at all
with an ASUS A7N8X-E motherboard in APIC mode, since storming interrupts don't repeat immediately. Use DELAY(1) to wait a bit for them to repeat. This affects all systems. Only delay for the first (10 * intr_storm_threshold) interrupts (per interrupt handler) so that this is only a pessimization while warming up. Throttle after calling the sub-handlers instead of before so that the long delay given by throttling can be used instead of the DELAY(1) to detect storms after warming up. Reduced the throttling period from 1/10 second to 1/hz seconds so that throttling doesn't destroy performance so much. Interrupts that are detected as storming are effectively handled by polling at a frequency of hz Hz. On A7N8X-E's there is another hardware or configuration bug that makes the throttled frequency closer to 2*hz Hz.
This commit is contained in:
parent
b612d5e1ea
commit
05b2c96fd3
@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/kthread.h>
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/limits.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mutex.h>
|
||||
@ -494,14 +495,14 @@ ithread_loop(void *arg)
|
||||
struct intrhand *ih; /* and our interrupt handler chain */
|
||||
struct thread *td;
|
||||
struct proc *p;
|
||||
int count, warned;
|
||||
int count, warming, warned;
|
||||
|
||||
td = curthread;
|
||||
p = td->td_proc;
|
||||
ithd = (struct ithd *)arg; /* point to myself */
|
||||
KASSERT(ithd->it_td == td && td->td_ithd == ithd,
|
||||
("%s: ithread and proc linkage out of sync", __func__));
|
||||
count = 0;
|
||||
warming = 10 * intr_storm_threshold;
|
||||
warned = 0;
|
||||
|
||||
/*
|
||||
@ -523,6 +524,7 @@ ithread_loop(void *arg)
|
||||
|
||||
CTR4(KTR_INTR, "%s: pid %d: (%s) need=%d", __func__,
|
||||
p->p_pid, p->p_comm, ithd->it_need);
|
||||
count = 0;
|
||||
while (ithd->it_need) {
|
||||
/*
|
||||
* Service interrupts. If another interrupt
|
||||
@ -531,25 +533,6 @@ ithread_loop(void *arg)
|
||||
* another pass.
|
||||
*/
|
||||
atomic_store_rel_int(&ithd->it_need, 0);
|
||||
|
||||
/*
|
||||
* If we detect an interrupt storm, pause with
|
||||
* the source masked for 1/10th of a second.
|
||||
*/
|
||||
if (intr_storm_threshold != 0 && count >=
|
||||
intr_storm_threshold) {
|
||||
if (!warned) {
|
||||
printf(
|
||||
"Interrupt storm detected on \"%s\"; throttling interrupt source\n",
|
||||
p->p_comm);
|
||||
warned = 1;
|
||||
}
|
||||
tsleep(&count, td->td_priority, "istorm",
|
||||
hz / 10);
|
||||
count = 0;
|
||||
} else
|
||||
count++;
|
||||
|
||||
restart:
|
||||
TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) {
|
||||
if (ithd->it_flags & IT_SOFT && !ih->ih_need)
|
||||
@ -575,8 +558,53 @@ ithread_loop(void *arg)
|
||||
if ((ih->ih_flags & IH_MPSAFE) == 0)
|
||||
mtx_unlock(&Giant);
|
||||
}
|
||||
if (ithd->it_enable != NULL)
|
||||
if (ithd->it_enable != NULL) {
|
||||
ithd->it_enable(ithd->it_vector);
|
||||
|
||||
/*
|
||||
* Storm detection needs a delay here
|
||||
* to see slightly delayed interrupts
|
||||
* on some machines, but we don't
|
||||
* want to always delay, so only delay
|
||||
* while warming up.
|
||||
*/
|
||||
if (warming != 0) {
|
||||
DELAY(1);
|
||||
--warming;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we detect an interrupt storm, sleep until
|
||||
* the next hardclock tick. We sleep at the
|
||||
* end of the loop instead of at the beginning
|
||||
* to ensure that we see slightly delayed
|
||||
* interrupts.
|
||||
*/
|
||||
if (count >= intr_storm_threshold) {
|
||||
if (!warned) {
|
||||
printf(
|
||||
"Interrupt storm detected on \"%s\"; throttling interrupt source\n",
|
||||
p->p_comm);
|
||||
warned = 1;
|
||||
}
|
||||
tsleep(&count, td->td_priority, "istorm", 1);
|
||||
|
||||
/*
|
||||
* Fudge the count to re-throttle if the
|
||||
* interrupt is still active. Our storm
|
||||
* detection is too primitive to detect
|
||||
* whether the storm has gone away
|
||||
* reliably, even if we were to waste a
|
||||
* lot of time spinning for the next
|
||||
* intr_storm_threshold interrupts, so
|
||||
* we assume that the storm hasn't gone
|
||||
* away unless the interrupt repeats
|
||||
* less often the hardclock interrupt.
|
||||
*/
|
||||
count = INT_MAX - 1;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
|
||||
mtx_assert(&Giant, MA_NOTOWNED);
|
||||
@ -589,7 +617,6 @@ ithread_loop(void *arg)
|
||||
mtx_lock_spin(&sched_lock);
|
||||
if (!ithd->it_need) {
|
||||
TD_SET_IWAIT(td);
|
||||
count = 0;
|
||||
CTR2(KTR_INTR, "%s: pid %d: done", __func__, p->p_pid);
|
||||
mi_switch(SW_VOL);
|
||||
CTR2(KTR_INTR, "%s: pid %d: resumed", __func__, p->p_pid);
|
||||
|
Loading…
Reference in New Issue
Block a user