Attempt to further fix some of the concurrency/reset issues that occur.

* ath_reset() is being called in softclock context, which may have the
  thing sleep on a lock.  To avoid this, since we really _shouldn't_
  be sleeping on any locks, break out the no-loss reset path into a tasklet
  and call that from:

  + ath_calibrate()
  + ath_watchdog()

  This has the added advantage that it'll end up also doing the frame
  RX cleanup from within the taskqueue context, rather than the softclock
  context.

* Shuffle around the taskqueue_block() call to be before we grab the lock
  and disable interrupts.

  The trouble here is that taskqueue_block() doesn't block currently
  queued (but not yet running) tasks so calling it doesn't guarantee
  no further tasks (that weren't running on _A_ CPU at the time of this
  call) will complete.  Calling taskqueue_drain() on these tasks won't
  work because if any _other_ thread calls taskqueue_enqueue() for whatever
  reason, everything gets very angry and stops working.

  This slightly changes the race condition enough to let ath_rx_tasklet()
  run before we try disabling it, and thus quietens the warnings a bit.

  The (more) true solution will be doing something like the following:

  * having a taskqueue_blocked mask in ath_softc;
  * having an interrupt_blocked mask in ath_softc;
  * only calling taskqueue_drain() on each individual task _after_ the
    lock has been acquired - that way no further tasklet scheduling
    is going to occur.
  * Then once the tasks have been blocked _and_ the interrupt has been
    disabled, call taskqueue_drain() on each, ensuring that anything
    that _was_ scheduled or running is removed.

  The trouble is if something calls taskqueue_enqueue() on a task
  after taskqueue_blocked() has been called but BEFORE taskqueue_drain()
  has been called, ta_pending will be set to 1 and taskqueue_drain()
  will sit there stuck in msleep() until you hard-kill the machine.

PR: kern/165382
PR: kern/165220
This commit is contained in:
Adrian Chadd 2012-02-25 19:12:54 +00:00
parent 79e538388f
commit d52f713265
2 changed files with 35 additions and 16 deletions

View File

@ -159,6 +159,7 @@ static void ath_beacon_proc(void *, int);
static struct ath_buf *ath_beacon_generate(struct ath_softc *,
struct ieee80211vap *);
static void ath_bstuck_proc(void *, int);
static void ath_reset_proc(void *, int);
static void ath_beacon_return(struct ath_softc *, struct ath_buf *);
static void ath_beacon_free(struct ath_softc *);
static void ath_beacon_config(struct ath_softc *, struct ieee80211vap *);
@ -392,6 +393,7 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
TASK_INIT(&sc->sc_rxtask, 0, ath_rx_tasklet, sc);
TASK_INIT(&sc->sc_bmisstask, 0, ath_bmiss_proc, sc);
TASK_INIT(&sc->sc_bstucktask,0, ath_bstuck_proc, sc);
TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc);
/*
* Allocate hardware transmit queues: one queue for
@ -1910,9 +1912,6 @@ ath_txrx_stop_locked(struct ath_softc *sc)
ATH_UNLOCK_ASSERT(sc);
ATH_PCU_LOCK_ASSERT(sc);
/* Stop any new TX/RX from occuring */
taskqueue_block(sc->sc_tq);
/*
* Sleep until all the pending operations have completed.
*
@ -2050,6 +2049,9 @@ ath_reset(struct ifnet *ifp, ATH_RESET_TYPE reset_type)
ATH_PCU_UNLOCK_ASSERT(sc);
ATH_UNLOCK_ASSERT(sc);
/* Try to (stop any further TX/RX from occuring */
taskqueue_block(sc->sc_tq);
ATH_PCU_LOCK(sc);
ath_hal_intrset(ah, 0); /* disable interrupts */
ath_txrx_stop_locked(sc); /* Ensure TX/RX is stopped */
@ -3162,6 +3164,23 @@ ath_beacon_start_adhoc(struct ath_softc *sc, struct ieee80211vap *vap)
ath_hal_txstart(ah, sc->sc_bhalq);
}
/*
* Reset the hardware, with no loss.
*
* This can't be used for a general case reset.
*/
static void
ath_reset_proc(void *arg, int pending)
{
struct ath_softc *sc = arg;
struct ifnet *ifp = sc->sc_ifp;
#if 0
if_printf(ifp, "%s: resetting\n", __func__);
#endif
ath_reset(ifp, ATH_RESET_NOLOSS);
}
/*
* Reset the hardware after detecting beacons have stopped.
*/
@ -5387,6 +5406,12 @@ ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
int ret = 0;
/* Treat this as an interface reset */
ATH_PCU_UNLOCK_ASSERT(sc);
ATH_UNLOCK_ASSERT(sc);
/* (Try to) stop TX/RX from occuring */
taskqueue_block(sc->sc_tq);
ATH_PCU_LOCK(sc);
ath_hal_intrset(ah, 0); /* Stop new RX/TX completion */
ath_txrx_stop_locked(sc); /* Stop pending RX/TX completion */
@ -5526,18 +5551,10 @@ ath_calibrate(void *arg)
DPRINTF(sc, ATH_DEBUG_CALIBRATE,
"%s: rfgain change\n", __func__);
sc->sc_stats.ast_per_rfgain++;
/*
* Drop lock - we can't hold it across the
* ath_reset() call. Instead, we'll drop
* out here, do a reset, then reschedule
* the callout.
*/
callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
sc->sc_resetcal = 0;
sc->sc_doresetcal = AH_TRUE;
ATH_UNLOCK(sc);
ath_reset(ifp, ATH_RESET_NOLOSS);
ATH_LOCK(sc);
taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
return;
}
/*
@ -6187,11 +6204,12 @@ ath_watchdog(void *arg)
/*
* We can't hold the lock across the ath_reset() call.
*
* And since this routine can't hold a lock and sleep,
* do the reset deferred.
*/
if (do_reset) {
ATH_UNLOCK(sc);
ath_reset(sc->sc_ifp, ATH_RESET_NOLOSS);
ATH_LOCK(sc);
taskqueue_enqueue(sc->sc_tq, &sc->sc_resettask);
}
callout_schedule(&sc->sc_wd_ch, hz);

View File

@ -501,6 +501,7 @@ struct ath_softc {
struct ath_txq *sc_cabq; /* tx q for cab frames */
struct task sc_bmisstask; /* bmiss int processing */
struct task sc_bstucktask; /* stuck beacon processing */
struct task sc_resettask; /* interface reset task */
enum {
OK, /* no change needed */
UPDATE, /* update pending */