hyperv/hn: Suspend and resume the backend properly upon MTU change.

Suspend:
- Prevent the backend from being touched on TX path.
- Clear the RNDIS RX filter, and wait for RX to drain.
- Make sure that NVS see the chimney sending buffer and RXBUF
  disconnection, before unlink these buffers from the channel.

Resume:
- Reconfigure the RNDIS filter.
- Allow TX path to work on the backend.
- Kick start the TX eof task, in case the OACTIVE is set.

This fixes various panics, when the interface has traffic and MTU
is being changed.

MFC after:	1 week
Sponsored by:	Microsoft
Differential Revision:	https://reviews.freebsd.org/D8046
This commit is contained in:
Sepherosa Ziehau 2016-09-28 04:34:21 +00:00
parent c8e8d13398
commit 65fe5a4627
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=306390
3 changed files with 173 additions and 10 deletions

View File

@ -348,6 +348,16 @@ hn_nvs_disconn_rxbuf(struct hn_softc *sc)
return (error);
}
sc->hn_flags &= ~HN_FLAG_RXBUF_CONNECTED;
/*
* Wait for the hypervisor to receive this NVS request.
*/
while (!vmbus_chan_tx_empty(sc->hn_prichan))
pause("waittx", 1);
/*
* Linger long enough for NVS to disconnect RXBUF.
*/
pause("lingtx", (200 * hz) / 1000);
}
if (sc->hn_rxbuf_gpadl != 0) {
@ -389,6 +399,17 @@ hn_nvs_disconn_chim(struct hn_softc *sc)
return (error);
}
sc->hn_flags &= ~HN_FLAG_CHIM_CONNECTED;
/*
* Wait for the hypervisor to receive this NVS request.
*/
while (!vmbus_chan_tx_empty(sc->hn_prichan))
pause("waittx", 1);
/*
* Linger long enough for NVS to disconnect chimney
* sending buffer.
*/
pause("lingtx", (200 * hz) / 1000);
}
if (sc->hn_chim_gpadl != 0) {

View File

@ -178,6 +178,7 @@ struct hn_tx_ring {
bus_dma_tag_t hn_tx_data_dtag;
uint64_t hn_csum_assist;
int hn_suspended;
int hn_gpa_cnt;
struct vmbus_gpa hn_gpa[NETVSC_PACKET_MAXPAGE];

View File

@ -348,6 +348,10 @@ static void hn_detach_allchans(struct hn_softc *);
static void hn_chan_callback(struct vmbus_channel *chan, void *xrxr);
static void hn_set_ring_inuse(struct hn_softc *, int);
static int hn_synth_attach(struct hn_softc *, int);
static bool hn_tx_ring_pending(struct hn_tx_ring *);
static void hn_suspend(struct hn_softc *);
static void hn_resume(struct hn_softc *);
static void hn_tx_ring_qflush(struct hn_tx_ring *);
static void hn_nvs_handle_notify(struct hn_softc *sc,
const struct vmbus_chanpkt_hdr *pkt);
@ -905,6 +909,23 @@ hn_txdesc_hold(struct hn_txdesc *txd)
atomic_add_int(&txd->refs, 1);
}
static bool
hn_tx_ring_pending(struct hn_tx_ring *txr)
{
bool pending = false;
#ifndef HN_USE_TXDESC_BUFRING
mtx_lock_spin(&txr->hn_txlist_spin);
if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
pending = true;
mtx_unlock_spin(&txr->hn_txlist_spin);
#else
if (!buf_ring_full(txr->hn_txdesc_br))
pending = true;
#endif
return (pending);
}
static __inline void
hn_txeof(struct hn_tx_ring *txr)
{
@ -1241,6 +1262,9 @@ hn_start_locked(struct hn_tx_ring *txr, int len)
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
if (__predict_false(txr->hn_suspended))
return 0;
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return 0;
@ -1627,6 +1651,9 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
#endif
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_suspend(sc);
/* We must remove and add back the device to cause the new
* MTU to take effect. This includes tearing down, but not
* deleting the channel, then bringing it back up.
@ -1651,7 +1678,9 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
hn_set_chim_size(sc, sc->hn_chim_szmax);
hn_init_locked(sc);
/* All done! Resume now. */
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_resume(sc);
HN_UNLOCK(sc);
break;
@ -2984,6 +3013,9 @@ hn_xmit(struct hn_tx_ring *txr, int len)
KASSERT(hn_use_if_start == 0,
("hn_xmit is called, when if_start is enabled"));
if (__predict_false(txr->hn_suspended))
return 0;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
return 0;
@ -3069,21 +3101,25 @@ hn_transmit(struct ifnet *ifp, struct mbuf *m)
return 0;
}
static void
hn_tx_ring_qflush(struct hn_tx_ring *txr)
{
struct mbuf *m;
mtx_lock(&txr->hn_tx_lock);
while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
m_freem(m);
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_xmit_qflush(struct ifnet *ifp)
{
struct hn_softc *sc = ifp->if_softc;
int i;
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
struct mbuf *m;
mtx_lock(&txr->hn_tx_lock);
while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
m_freem(m);
mtx_unlock(&txr->hn_tx_lock);
}
for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
if_qflush(ifp);
}
@ -3502,6 +3538,111 @@ hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
}
}
static void
hn_rx_drain(struct vmbus_channel *chan)
{
while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan))
pause("waitch", 1);
vmbus_chan_intr_drain(chan);
}
static void
hn_suspend(struct hn_softc *sc)
{
struct vmbus_channel **subch = NULL;
int i, nsubch;
HN_LOCK_ASSERT(sc);
/*
* Suspend TX.
*/
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
mtx_lock(&txr->hn_tx_lock);
txr->hn_suspended = 1;
mtx_unlock(&txr->hn_tx_lock);
/* No one is able send more packets now. */
/* Wait for all pending sends to finish. */
while (hn_tx_ring_pending(txr))
pause("hnwtx", 1 /* 1 tick */);
}
/*
* Disable RX.
*/
hv_rf_on_close(sc);
/* Give RNDIS enough time to flush all pending data packets. */
pause("waitrx", (200 * hz) / 1000);
nsubch = sc->hn_rx_ring_inuse - 1;
if (nsubch > 0)
subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
/*
* Drain RX/TX bufrings and interrupts.
*/
if (subch != NULL) {
for (i = 0; i < nsubch; ++i)
hn_rx_drain(subch[i]);
}
hn_rx_drain(sc->hn_prichan);
if (subch != NULL)
vmbus_subchan_rel(subch, nsubch);
}
static void
hn_resume(struct hn_softc *sc)
{
struct hn_tx_ring *txr;
int i;
HN_LOCK_ASSERT(sc);
/*
* Re-enable RX.
*/
hv_rf_on_open(sc);
/*
* Make sure to clear suspend status on "all" TX rings,
* since hn_tx_ring_inuse can be changed after hn_suspend().
*/
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
mtx_lock(&txr->hn_tx_lock);
txr->hn_suspended = 0;
mtx_unlock(&txr->hn_tx_lock);
}
if (!hn_use_if_start) {
/*
* Flush unused drbrs, since hn_tx_ring_inuse may be
* reduced.
*/
for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
}
/*
* Kick start TX.
*/
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
txr = &sc->hn_tx_ring[i];
/*
* Use txeof task, so that any pending oactive can be
* cleared properly.
*/
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
static void
hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
{