From 05b2c96fd3f63ea78047fb3fe1ae62bef0172fa6 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Sat, 5 Jun 2004 18:27:28 +0000 Subject: [PATCH] Detect interrupt storms better. The storm detection didn't work at all with an ASUS A7N8X-E motherboard in APIC mode, since storming interrupts don't repeat immediately. Use DELAY(1) to wait a bit for them to repeat. This affects all systems. Only delay for the first (10 * intr_storm_threshold) interrupts (per interrupt handler) so that this is only a pessimization while warming up. Throttle after calling the sub-handlers instead of before so that the long delay given by throttling can be used instead of the DELAY(1) to detect storms after warming up. Reduced the throttling period from 1/10 second to 1/hz seconds so that throttling doesn't destroy performance so much. Interrupts that are detected as storming are effectively handled by polling at a frequency of hz Hz. On A7N8X-E's there is another hardware or configuration bug that makes the throttled frequency closer to 2*hz Hz. --- sys/kern/kern_intr.c | 73 ++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index 354fba3964a7..920b9da152aa 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -494,14 +495,14 @@ ithread_loop(void *arg) struct intrhand *ih; /* and our interrupt handler chain */ struct thread *td; struct proc *p; - int count, warned; + int count, warming, warned; td = curthread; p = td->td_proc; ithd = (struct ithd *)arg; /* point to myself */ KASSERT(ithd->it_td == td && td->td_ithd == ithd, ("%s: ithread and proc linkage out of sync", __func__)); - count = 0; + warming = 10 * intr_storm_threshold; warned = 0; /* @@ -523,6 +524,7 @@ ithread_loop(void *arg) CTR4(KTR_INTR, "%s: pid %d: (%s) need=%d", __func__, p->p_pid, p->p_comm, ithd->it_need); + count = 0; while (ithd->it_need) { /* * Service interrupts. If another interrupt @@ -531,25 +533,6 @@ ithread_loop(void *arg) * another pass. */ atomic_store_rel_int(&ithd->it_need, 0); - - /* - * If we detect an interrupt storm, pause with - * the source masked for 1/10th of a second. - */ - if (intr_storm_threshold != 0 && count >= - intr_storm_threshold) { - if (!warned) { - printf( - "Interrupt storm detected on \"%s\"; throttling interrupt source\n", - p->p_comm); - warned = 1; - } - tsleep(&count, td->td_priority, "istorm", - hz / 10); - count = 0; - } else - count++; - restart: TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) { if (ithd->it_flags & IT_SOFT && !ih->ih_need) @@ -575,8 +558,53 @@ ithread_loop(void *arg) if ((ih->ih_flags & IH_MPSAFE) == 0) mtx_unlock(&Giant); } - if (ithd->it_enable != NULL) + if (ithd->it_enable != NULL) { ithd->it_enable(ithd->it_vector); + + /* + * Storm detection needs a delay here + * to see slightly delayed interrupts + * on some machines, but we don't + * want to always delay, so only delay + * while warming up. + */ + if (warming != 0) { + DELAY(1); + --warming; + } + } + + /* + * If we detect an interrupt storm, sleep until + * the next hardclock tick. We sleep at the + * end of the loop instead of at the beginning + * to ensure that we see slightly delayed + * interrupts. + */ + if (count >= intr_storm_threshold) { + if (!warned) { + printf( + "Interrupt storm detected on \"%s\"; throttling interrupt source\n", + p->p_comm); + warned = 1; + } + tsleep(&count, td->td_priority, "istorm", 1); + + /* + * Fudge the count to re-throttle if the + * interrupt is still active. Our storm + * detection is too primitive to detect + * whether the storm has gone away + * reliably, even if we were to waste a + * lot of time spinning for the next + * intr_storm_threshold interrupts, so + * we assume that the storm hasn't gone + * away unless the interrupt repeats + * less often the hardclock interrupt. + */ + count = INT_MAX - 1; + } + count++; } WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); mtx_assert(&Giant, MA_NOTOWNED); @@ -589,7 +617,6 @@ ithread_loop(void *arg) mtx_lock_spin(&sched_lock); if (!ithd->it_need) { TD_SET_IWAIT(td); - count = 0; CTR2(KTR_INTR, "%s: pid %d: done", __func__, p->p_pid); mi_switch(SW_VOL); CTR2(KTR_INTR, "%s: pid %d: resumed", __func__, p->p_pid);