ioat(4): Block asynchronous work during HW reset
Fix the race between ioat_reset_hw and ioat_process_events. HW reset isn't protected by a lock because it can sleep for a long time (40.1 ms). This resulted in a race where we would process bogus parts of the descriptor ring as if it had completed. This looked like duplicate completions on old events, if your ring had looped at least once. Block callout and interrupt work while reset runs so the completion end of things does not observe indeterminate state and process invalid parts of the ring. Start the channel with a manually implemented ioat_null() to keep other submitters quiesced while we wait for the channel to start (100 us). r295605 may have made the race between ioat_reset_hw and ioat_process_events wider, but I believe it already existed before that revision. ioat_process_events can be invoked by two asynchronous sources: callout (softclock) and device interrupt. Those could race each other, to the same effect. Reviewed by: markj Approved by: re Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D7097
This commit is contained in:
parent
93f7f84af6
commit
fe8712f84a
@ -376,12 +376,32 @@ ioat_teardown_intr(struct ioat_softc *ioat)
|
||||
static int
|
||||
ioat_start_channel(struct ioat_softc *ioat)
|
||||
{
|
||||
struct ioat_dma_hw_descriptor *hw_desc;
|
||||
struct ioat_descriptor *desc;
|
||||
struct bus_dmadesc *dmadesc;
|
||||
uint64_t status;
|
||||
uint32_t chanerr;
|
||||
int i;
|
||||
|
||||
ioat_acquire(&ioat->dmaengine);
|
||||
ioat_null(&ioat->dmaengine, NULL, NULL, 0);
|
||||
|
||||
/* Submit 'NULL' operation manually to avoid quiescing flag */
|
||||
desc = ioat_get_ring_entry(ioat, ioat->head);
|
||||
dmadesc = &desc->bus_dmadesc;
|
||||
hw_desc = desc->u.dma;
|
||||
|
||||
dmadesc->callback_fn = NULL;
|
||||
dmadesc->callback_arg = NULL;
|
||||
|
||||
hw_desc->u.control_raw = 0;
|
||||
hw_desc->u.control_generic.op = IOAT_OP_COPY;
|
||||
hw_desc->u.control_generic.completion_update = 1;
|
||||
hw_desc->size = 8;
|
||||
hw_desc->src_addr = 0;
|
||||
hw_desc->dest_addr = 0;
|
||||
hw_desc->u.control.null = 1;
|
||||
|
||||
ioat_submit_single(ioat);
|
||||
ioat_release(&ioat->dmaengine);
|
||||
|
||||
for (i = 0; i < 100; i++) {
|
||||
@ -496,6 +516,7 @@ ioat3_attach(device_t device)
|
||||
ioat->head = ioat->hw_head = 0;
|
||||
ioat->tail = 0;
|
||||
ioat->last_seen = 0;
|
||||
*ioat->comp_update = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -641,14 +662,24 @@ ioat_process_events(struct ioat_softc *ioat)
|
||||
boolean_t pending;
|
||||
int error;
|
||||
|
||||
CTR0(KTR_IOAT, __func__);
|
||||
|
||||
mtx_lock(&ioat->cleanup_lock);
|
||||
|
||||
/*
|
||||
* Don't run while the hardware is being reset. Reset is responsible
|
||||
* for blocking new work and draining & completing existing work, so
|
||||
* there is nothing to do until new work is queued after reset anyway.
|
||||
*/
|
||||
if (ioat->resetting_cleanup) {
|
||||
mtx_unlock(&ioat->cleanup_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
completed = 0;
|
||||
comp_update = *ioat->comp_update;
|
||||
status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
|
||||
|
||||
CTR0(KTR_IOAT, __func__);
|
||||
|
||||
if (status == ioat->last_seen) {
|
||||
/*
|
||||
* If we landed in process_events and nothing has been
|
||||
@ -1643,6 +1674,13 @@ ioat_shrink_timer_callback(void *arg)
|
||||
|
||||
/* Slowly scale the ring down if idle. */
|
||||
mtx_lock(&ioat->submit_lock);
|
||||
|
||||
/* Don't run while the hardware is being reset. */
|
||||
if (ioat->resetting) {
|
||||
mtx_unlock(&ioat->submit_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
order = ioat->ring_size_order;
|
||||
if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) {
|
||||
mtx_unlock(&ioat->submit_lock);
|
||||
@ -1712,6 +1750,14 @@ ioat_reset_hw(struct ioat_softc *ioat)
|
||||
ioat_drain_locked(ioat);
|
||||
mtx_unlock(IOAT_REFLK);
|
||||
|
||||
/*
|
||||
* Suspend ioat_process_events while the hardware and softc are in an
|
||||
* indeterminate state.
|
||||
*/
|
||||
mtx_lock(&ioat->cleanup_lock);
|
||||
ioat->resetting_cleanup = TRUE;
|
||||
mtx_unlock(&ioat->cleanup_lock);
|
||||
|
||||
status = ioat_get_chansts(ioat);
|
||||
if (is_ioat_active(status) || is_ioat_idle(status))
|
||||
ioat_suspend(ioat);
|
||||
@ -1793,6 +1839,7 @@ ioat_reset_hw(struct ioat_softc *ioat)
|
||||
*/
|
||||
ioat->tail = ioat->head = ioat->hw_head = 0;
|
||||
ioat->last_seen = 0;
|
||||
*ioat->comp_update = 0;
|
||||
|
||||
ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
|
||||
ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
|
||||
@ -1800,17 +1847,28 @@ ioat_reset_hw(struct ioat_softc *ioat)
|
||||
error = 0;
|
||||
|
||||
out:
|
||||
mtx_lock(IOAT_REFLK);
|
||||
ioat->resetting = FALSE;
|
||||
wakeup(&ioat->resetting);
|
||||
|
||||
ioat->quiescing = FALSE;
|
||||
wakeup(&ioat->quiescing);
|
||||
mtx_unlock(IOAT_REFLK);
|
||||
/*
|
||||
* Resume completions now that ring state is consistent.
|
||||
* ioat_start_channel will add a pending completion and if we are still
|
||||
* blocking completions, we may livelock.
|
||||
*/
|
||||
mtx_lock(&ioat->cleanup_lock);
|
||||
ioat->resetting_cleanup = FALSE;
|
||||
mtx_unlock(&ioat->cleanup_lock);
|
||||
|
||||
/* Enqueues a null operation and ensures it completes. */
|
||||
if (error == 0)
|
||||
error = ioat_start_channel(ioat);
|
||||
|
||||
/* Unblock submission of new work */
|
||||
mtx_lock(IOAT_REFLK);
|
||||
ioat->quiescing = FALSE;
|
||||
wakeup(&ioat->quiescing);
|
||||
|
||||
ioat->resetting = FALSE;
|
||||
wakeup(&ioat->resetting);
|
||||
mtx_unlock(IOAT_REFLK);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
@ -491,7 +491,8 @@ struct ioat_softc {
|
||||
boolean_t is_reset_pending;
|
||||
boolean_t is_channel_running;
|
||||
boolean_t intrdelay_supported;
|
||||
boolean_t resetting;
|
||||
boolean_t resetting; /* submit_lock */
|
||||
boolean_t resetting_cleanup; /* cleanup_lock */
|
||||
|
||||
uint32_t head;
|
||||
uint32_t tail;
|
||||
|
Loading…
Reference in New Issue
Block a user