hyperv/hn: Split TX ring data structure out of softc

This paves the way for upcoming vRSS stuffs and eases more code cleanup.

Reviewed by:	adrian
Approved by:	adrian (mentor)
MFC after:	1 week
Sponsored by:	Microsoft OSTC
Differential Revision:	https://reviews.freebsd.org/D5283
This commit is contained in:
Sepherosa Ziehau 2016-02-18 07:37:59 +00:00
parent b8f2d59daf
commit dbfb4eba54
2 changed files with 404 additions and 208 deletions

View File

@ -1011,6 +1011,38 @@ struct hn_rx_ring {
#define HN_TRUST_HCSUM_TCP 0x0002
#define HN_TRUST_HCSUM_UDP 0x0004
struct hn_tx_ring {
struct mtx hn_txlist_spin;
struct hn_txdesc_list hn_txlist;
int hn_txdesc_cnt;
int hn_txdesc_avail;
int hn_txeof;
int hn_sched_tx;
struct taskqueue *hn_tx_taskq;
struct task hn_start_task;
struct task hn_txeof_task;
struct mtx hn_tx_lock;
struct hn_softc *hn_sc;
int hn_direct_tx_size;
int hn_tx_chimney_size;
bus_dma_tag_t hn_tx_data_dtag;
uint64_t hn_csum_assist;
u_long hn_no_txdescs;
u_long hn_send_failed;
u_long hn_txdma_failed;
u_long hn_tx_collapsed;
u_long hn_tx_chimney;
/* Rarely used stuffs */
struct hn_txdesc *hn_txdesc;
bus_dma_tag_t hn_tx_rndis_dtag;
struct sysctl_oid *hn_tx_sysctl_tree;
} __aligned(CACHE_LINE_SIZE);
/*
* Device-specific softc structure
*/
@ -1028,33 +1060,14 @@ typedef struct hn_softc {
struct hv_device *hn_dev_obj;
netvsc_dev *net_dev;
struct hn_txdesc *hn_txdesc;
bus_dma_tag_t hn_tx_data_dtag;
bus_dma_tag_t hn_tx_rndis_dtag;
int hn_tx_chimney_size;
int hn_tx_chimney_max;
uint64_t hn_csum_assist;
struct mtx hn_txlist_spin;
struct hn_txdesc_list hn_txlist;
int hn_txdesc_cnt;
int hn_txdesc_avail;
int hn_txeof;
int hn_sched_tx;
int hn_direct_tx_size;
struct taskqueue *hn_tx_taskq;
struct task hn_start_task;
struct task hn_txeof_task;
int hn_rx_ring_cnt;
struct hn_rx_ring *hn_rx_ring;
u_long hn_no_txdescs;
u_long hn_send_failed;
u_long hn_txdma_failed;
u_long hn_tx_collapsed;
u_long hn_tx_chimney;
int hn_tx_ring_cnt;
struct hn_tx_ring *hn_tx_ring;
int hn_tx_chimney_max;
struct taskqueue *hn_tx_taskq;
struct sysctl_oid *hn_tx_sysctl_tree;
} hn_softc_t;
/*

View File

@ -153,7 +153,7 @@ __FBSDID("$FreeBSD$");
struct hn_txdesc {
SLIST_ENTRY(hn_txdesc) link;
struct mbuf *m;
struct hn_softc *sc;
struct hn_tx_ring *txr;
int refs;
uint32_t flags; /* HN_TXD_FLAG_ */
netvsc_packet netvsc_pkt; /* XXX to be removed */
@ -193,7 +193,6 @@ struct hn_txdesc {
#define NV_LOCK_INIT(_sc, _name) \
mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock)
#define NV_TRYLOCK(_sc) mtx_trylock(&(_sc)->hn_lock)
#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED)
#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock)
#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock)
@ -266,9 +265,9 @@ static void hn_stop(hn_softc_t *sc);
static void hn_ifinit_locked(hn_softc_t *sc);
static void hn_ifinit(void *xsc);
static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
static int hn_start_locked(struct ifnet *ifp, int len);
static int hn_start_locked(struct hn_tx_ring *txr, int len);
static void hn_start(struct ifnet *ifp);
static void hn_start_txeof(struct ifnet *ifp);
static void hn_start_txeof(struct hn_tx_ring *);
static int hn_ifmedia_upd(struct ifnet *ifp);
static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
@ -277,14 +276,20 @@ static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_check_iplen(const struct mbuf *, int);
static int hn_create_tx_ring(struct hn_softc *sc);
static void hn_destroy_tx_ring(struct hn_softc *sc);
static int hn_create_tx_ring(struct hn_softc *, int);
static void hn_destroy_tx_ring(struct hn_tx_ring *);
static int hn_create_tx_data(struct hn_softc *);
static void hn_destroy_tx_data(struct hn_softc *);
static void hn_start_taskfunc(void *xsc, int pending);
static void hn_txeof_taskfunc(void *xsc, int pending);
static int hn_encap(struct hn_softc *, struct hn_txdesc *, struct mbuf **);
static void hn_stop_tx_tasks(struct hn_softc *);
static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
static void hn_create_rx_data(struct hn_softc *sc);
static void hn_destroy_rx_data(struct hn_softc *sc);
static void hn_set_tx_chimney_size(struct hn_softc *, int);
static int
hn_ifmedia_upd(struct ifnet *ifp __unused)
@ -350,8 +355,6 @@ netvsc_attach(device_t dev)
hn_softc_t *sc;
int unit = device_get_unit(dev);
struct ifnet *ifp = NULL;
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
int error;
#if __FreeBSD_version >= 1100045
int tso_maxlen;
@ -365,7 +368,6 @@ netvsc_attach(device_t dev)
bzero(sc, sizeof(hn_softc_t));
sc->hn_unit = unit;
sc->hn_dev = dev;
sc->hn_direct_tx_size = hn_direct_tx_size;
if (hn_tx_taskq == NULL) {
sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
@ -375,13 +377,6 @@ netvsc_attach(device_t dev)
} else {
sc->hn_tx_taskq = hn_tx_taskq;
}
TASK_INIT(&sc->hn_start_task, 0, hn_start_taskfunc, sc);
TASK_INIT(&sc->hn_txeof_task, 0, hn_txeof_taskfunc, sc);
error = hn_create_tx_ring(sc);
if (error)
goto failed;
NV_LOCK_INIT(sc, "NetVSCLock");
sc->hn_dev_obj = device_ctx;
@ -389,6 +384,10 @@ netvsc_attach(device_t dev)
ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
error = hn_create_tx_data(sc);
if (error)
goto failed;
hn_create_rx_data(sc);
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
@ -421,12 +420,7 @@ netvsc_attach(device_t dev)
ifp->if_capenable |=
IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
IFCAP_LRO;
if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
sc->hn_csum_assist = HN_CSUM_ASSIST;
else
sc->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
ifp->if_hwassist = sc->hn_csum_assist | CSUM_TSO;
ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
error = hv_rf_on_device_add(device_ctx, &device_info);
if (error)
@ -455,52 +449,14 @@ netvsc_attach(device_t dev)
#endif
sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
if (hn_tx_chimney_size > 0 &&
hn_tx_chimney_size < sc->hn_tx_chimney_max)
sc->hn_tx_chimney_size = hn_tx_chimney_size;
/*
* Always schedule transmission instead of trying
* to do direct transmission. This one gives the
* best performance so far.
*/
sc->hn_sched_tx = 1;
ctx = device_get_sysctl_ctx(dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs",
CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed",
CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed",
CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed",
CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed");
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney",
CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
"Chimney send packet size upper boundary");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
"I", "Chimney send packet size limit");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "direct_tx_size",
CTLFLAG_RW, &sc->hn_direct_tx_size, 0,
"Size of the packet for direct transmission");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "sched_tx",
CTLFLAG_RW, &sc->hn_sched_tx, 0,
"Always schedule transmission "
"instead of doing direct transmission");
hn_set_tx_chimney_size(sc, hn_tx_chimney_size);
return (0);
failed:
hn_destroy_tx_ring(sc);
hn_destroy_tx_data(sc);
if (ifp != NULL)
if_free(ifp);
return (error);
@ -531,14 +487,14 @@ netvsc_detach(device_t dev)
hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
taskqueue_drain(sc->hn_tx_taskq, &sc->hn_start_task);
taskqueue_drain(sc->hn_tx_taskq, &sc->hn_txeof_task);
if (sc->hn_tx_taskq != hn_tx_taskq)
taskqueue_free(sc->hn_tx_taskq);
hn_stop_tx_tasks(sc);
ifmedia_removeall(&sc->hn_media);
hn_destroy_rx_data(sc);
hn_destroy_tx_ring(sc);
hn_destroy_tx_data(sc);
if (sc->hn_tx_taskq != hn_tx_taskq)
taskqueue_free(sc->hn_tx_taskq);
return (0);
}
@ -553,13 +509,13 @@ netvsc_shutdown(device_t dev)
}
static __inline int
hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
{
struct mbuf *m = *m_head;
int error;
error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap,
error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
m, segs, nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
struct mbuf *m_new;
@ -569,13 +525,13 @@ hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
return ENOBUFS;
else
*m_head = m = m_new;
sc->hn_tx_collapsed++;
txr->hn_tx_collapsed++;
error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag,
error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
}
if (!error) {
bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap,
bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
BUS_DMASYNC_PREWRITE);
txd->flags |= HN_TXD_FLAG_DMAMAP;
}
@ -583,20 +539,20 @@ hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
}
static __inline void
hn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd)
hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
if (txd->flags & HN_TXD_FLAG_DMAMAP) {
bus_dmamap_sync(sc->hn_tx_data_dtag,
bus_dmamap_sync(txr->hn_tx_data_dtag,
txd->data_dmap, BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(sc->hn_tx_data_dtag,
bus_dmamap_unload(txr->hn_tx_data_dtag,
txd->data_dmap);
txd->flags &= ~HN_TXD_FLAG_DMAMAP;
}
}
static __inline int
hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
@ -606,7 +562,7 @@ hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
if (atomic_fetchadd_int(&txd->refs, -1) != 1)
return 0;
hn_txdesc_dmamap_unload(sc, txd);
hn_txdesc_dmamap_unload(txr, txd);
if (txd->m != NULL) {
m_freem(txd->m);
txd->m = NULL;
@ -614,31 +570,31 @@ hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
txd->flags |= HN_TXD_FLAG_ONLIST;
mtx_lock_spin(&sc->hn_txlist_spin);
KASSERT(sc->hn_txdesc_avail >= 0 &&
sc->hn_txdesc_avail < sc->hn_txdesc_cnt,
("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail));
sc->hn_txdesc_avail++;
SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
mtx_unlock_spin(&sc->hn_txlist_spin);
mtx_lock_spin(&txr->hn_txlist_spin);
KASSERT(txr->hn_txdesc_avail >= 0 &&
txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
txr->hn_txdesc_avail++;
SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
mtx_unlock_spin(&txr->hn_txlist_spin);
return 1;
}
static __inline struct hn_txdesc *
hn_txdesc_get(struct hn_softc *sc)
hn_txdesc_get(struct hn_tx_ring *txr)
{
struct hn_txdesc *txd;
mtx_lock_spin(&sc->hn_txlist_spin);
txd = SLIST_FIRST(&sc->hn_txlist);
mtx_lock_spin(&txr->hn_txlist_spin);
txd = SLIST_FIRST(&txr->hn_txlist);
if (txd != NULL) {
KASSERT(sc->hn_txdesc_avail > 0,
("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail));
sc->hn_txdesc_avail--;
SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
KASSERT(txr->hn_txdesc_avail > 0,
("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
txr->hn_txdesc_avail--;
SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
}
mtx_unlock_spin(&sc->hn_txlist_spin);
mtx_unlock_spin(&txr->hn_txlist_spin);
if (txd != NULL) {
KASSERT(txd->m == NULL && txd->refs == 0 &&
@ -670,20 +626,21 @@ netvsc_xmit_completion(void *context)
{
netvsc_packet *packet = context;
struct hn_txdesc *txd;
struct hn_softc *sc;
struct hn_tx_ring *txr;
txd = (struct hn_txdesc *)(uintptr_t)
packet->compl.send.send_completion_tid;
sc = txd->sc;
sc->hn_txeof = 1;
hn_txdesc_put(sc, txd);
txr = txd->txr;
txr->hn_txeof = 1;
hn_txdesc_put(txr, txd);
}
void
netvsc_channel_rollup(struct hv_device *device_ctx)
{
struct hn_softc *sc = device_get_softc(device_ctx->device);
struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; /* TODO: vRSS */
#if defined(INET) || defined(INET6)
struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
struct lro_ctrl *lro = &rxr->hn_lro;
@ -695,11 +652,11 @@ netvsc_channel_rollup(struct hv_device *device_ctx)
}
#endif
if (!sc->hn_txeof)
if (!txr->hn_txeof)
return;
sc->hn_txeof = 0;
hn_start_txeof(sc->hn_ifp);
txr->hn_txeof = 0;
hn_start_txeof(txr);
}
/*
@ -707,7 +664,7 @@ netvsc_channel_rollup(struct hv_device *device_ctx)
* If this function fails, then both txd and m_head0 will be freed.
*/
static int
hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
{
bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
int error, nsegs, i;
@ -810,7 +767,7 @@ hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
#endif
tso_info->lso_v2_xmit.tcp_header_offset = 0;
tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
} else if (m_head->m_pkthdr.csum_flags & sc->hn_csum_assist) {
} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
rndis_tcp_ip_csum_info *csum_info;
rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
@ -837,8 +794,8 @@ hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
/*
* Chimney send, if the packet could fit into one chimney buffer.
*/
if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) {
netvsc_dev *net_dev = sc->net_dev;
if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) {
netvsc_dev *net_dev = txr->hn_sc->net_dev;
uint32_t send_buf_section_idx;
send_buf_section_idx =
@ -857,12 +814,12 @@ hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
packet->send_buf_section_size =
packet->tot_data_buf_len;
packet->page_buf_count = 0;
sc->hn_tx_chimney++;
txr->hn_tx_chimney++;
goto done;
}
}
error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs);
error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
if (error) {
int freed;
@ -872,12 +829,12 @@ hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
m_freem(m_head);
*m_head0 = NULL;
freed = hn_txdesc_put(sc, txd);
freed = hn_txdesc_put(txr, txd);
KASSERT(freed != 0,
("fail to free txd upon txdma error"));
sc->hn_txdma_failed++;
if_inc_counter(sc->hn_ifp, IFCOUNTER_OERRORS, 1);
txr->hn_txdma_failed++;
if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
return error;
}
*m_head0 = m_head;
@ -920,11 +877,15 @@ hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0)
* Start a transmit of one or more packets
*/
static int
hn_start_locked(struct ifnet *ifp, int len)
hn_start_locked(struct hn_tx_ring *txr, int len)
{
struct hn_softc *sc = ifp->if_softc;
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
return 0;
@ -948,15 +909,15 @@ hn_start_locked(struct ifnet *ifp, int len)
return 1;
}
txd = hn_txdesc_get(sc);
txd = hn_txdesc_get(txr);
if (txd == NULL) {
sc->hn_no_txdescs++;
txr->hn_no_txdescs++;
IF_PREPEND(&ifp->if_snd, m_head);
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
break;
}
error = hn_encap(sc, txd, &m_head);
error = hn_encap(txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed */
continue;
@ -971,7 +932,7 @@ hn_start_locked(struct ifnet *ifp, int len)
ETHER_BPF_MTAP(ifp, m_head);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
}
hn_txdesc_put(sc, txd);
hn_txdesc_put(txr, txd);
if (__predict_false(error)) {
int freed;
@ -983,9 +944,9 @@ hn_start_locked(struct ifnet *ifp, int len)
* commands to run? Ask netvsc_channel_rollup()
* to kick start later.
*/
sc->hn_txeof = 1;
txr->hn_txeof = 1;
if (!send_failed) {
sc->hn_send_failed++;
txr->hn_send_failed++;
send_failed = 1;
/*
* Try sending again after set hn_txeof;
@ -1002,11 +963,11 @@ hn_start_locked(struct ifnet *ifp, int len)
* DMA map in hn_txdesc_put(), if it was loaded.
*/
txd->m = NULL;
freed = hn_txdesc_put(sc, txd);
freed = hn_txdesc_put(txr, txd);
KASSERT(freed != 0,
("fail to free txd upon send error"));
sc->hn_send_failed++;
txr->hn_send_failed++;
IF_PREPEND(&ifp->if_snd, m_head);
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
break;
@ -1384,8 +1345,10 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max)
sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
if (sc->hn_tx_ring[0].hn_tx_chimney_size >
sc->hn_tx_chimney_max)
hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
hn_ifinit_locked(sc);
NV_LOCK(sc);
@ -1450,10 +1413,13 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
ifp->if_capenable ^= IFCAP_TXCSUM;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= sc->hn_csum_assist;
else
ifp->if_hwassist &= ~sc->hn_csum_assist;
if (ifp->if_capenable & IFCAP_TXCSUM) {
ifp->if_hwassist |=
sc->hn_tx_ring[0].hn_csum_assist;
} else {
ifp->if_hwassist &=
~sc->hn_tx_ring[0].hn_csum_assist;
}
}
if (mask & IFCAP_RXCSUM)
@ -1536,48 +1502,54 @@ static void
hn_start(struct ifnet *ifp)
{
struct hn_softc *sc = ifp->if_softc;
struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
if (sc->hn_sched_tx)
if (txr->hn_sched_tx)
goto do_sched;
if (NV_TRYLOCK(sc)) {
if (mtx_trylock(&txr->hn_tx_lock)) {
int sched;
sched = hn_start_locked(ifp, sc->hn_direct_tx_size);
NV_UNLOCK(sc);
sched = hn_start_locked(txr, txr->hn_direct_tx_size);
mtx_unlock(&txr->hn_tx_lock);
if (!sched)
return;
}
do_sched:
taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_start_task);
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_start_task);
}
static void
hn_start_txeof(struct ifnet *ifp)
hn_start_txeof(struct hn_tx_ring *txr)
{
struct hn_softc *sc = ifp->if_softc;
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
if (sc->hn_sched_tx)
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
if (txr->hn_sched_tx)
goto do_sched;
if (NV_TRYLOCK(sc)) {
if (mtx_trylock(&txr->hn_tx_lock)) {
int sched;
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
sched = hn_start_locked(ifp, sc->hn_direct_tx_size);
NV_UNLOCK(sc);
if (sched)
taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_start_task);
sched = hn_start_locked(txr, txr->hn_direct_tx_size);
mtx_unlock(&txr->hn_tx_lock);
if (sched) {
taskqueue_enqueue(txr->hn_tx_taskq,
&txr->hn_start_task);
}
} else {
do_sched:
/*
* Release the OACTIVE earlier, with the hope, that
* others could catch up. The task will clear the
* flag again with the NV_LOCK to avoid possible
* flag again with the hn_tx_lock to avoid possible
* races.
*/
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_txeof_task);
taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
@ -1736,7 +1708,7 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
struct hn_softc *sc = arg1;
int chimney_size, error;
chimney_size = sc->hn_tx_chimney_size;
chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size;
error = sysctl_handle_int(oidp, &chimney_size, 0, req);
if (error || req->newptr == NULL)
return error;
@ -1744,8 +1716,7 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
return EINVAL;
if (sc->hn_tx_chimney_size != chimney_size)
sc->hn_tx_chimney_size = chimney_size;
hn_set_tx_chimney_size(sc, chimney_size);
return 0;
}
@ -1801,6 +1772,56 @@ hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
return 0;
}
static int
hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error;
struct hn_tx_ring *txr;
u_long stat;
stat = 0;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
stat += *((u_long *)((uint8_t *)txr + ofs));
}
error = sysctl_handle_long(oidp, &stat, 0, req);
if (error || req->newptr == NULL)
return error;
/* Zero out this stat. */
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
*((u_long *)((uint8_t *)txr + ofs)) = 0;
}
return 0;
}
static int
hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
int ofs = arg2, i, error, conf;
struct hn_tx_ring *txr;
txr = &sc->hn_tx_ring[0];
conf = *((int *)((uint8_t *)txr + ofs));
error = sysctl_handle_int(oidp, &conf, 0, req);
if (error || req->newptr == NULL)
return error;
NV_LOCK(sc);
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
*((int *)((uint8_t *)txr + ofs)) = conf;
}
NV_UNLOCK(sc);
return 0;
}
static int
hn_check_iplen(const struct mbuf *m, int hoff)
{
@ -2019,16 +2040,37 @@ hn_destroy_rx_data(struct hn_softc *sc)
}
static int
hn_create_tx_ring(struct hn_softc *sc)
hn_create_tx_ring(struct hn_softc *sc, int id)
{
struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
bus_dma_tag_t parent_dtag;
int error, i;
sc->hn_txdesc_cnt = HN_TX_DESC_CNT;
sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt,
txr->hn_sc = sc;
mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
M_NETVSC, M_WAITOK | M_ZERO);
SLIST_INIT(&sc->hn_txlist);
mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
SLIST_INIT(&txr->hn_txlist);
txr->hn_tx_taskq = sc->hn_tx_taskq;
TASK_INIT(&txr->hn_start_task, 0, hn_start_taskfunc, txr);
TASK_INIT(&txr->hn_txeof_task, 0, hn_txeof_taskfunc, txr);
txr->hn_direct_tx_size = hn_direct_tx_size;
if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
txr->hn_csum_assist = HN_CSUM_ASSIST;
else
txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
/*
* Always schedule transmission instead of trying to do direct
* transmission. This one gives the best performance so far.
*/
txr->hn_sched_tx = 1;
parent_dtag = bus_get_dma_tag(sc->hn_dev);
@ -2045,7 +2087,7 @@ hn_create_tx_ring(struct hn_softc *sc)
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&sc->hn_tx_rndis_dtag);
&txr->hn_tx_rndis_dtag);
if (error) {
device_printf(sc->hn_dev, "failed to create rndis dmatag\n");
return error;
@ -2064,21 +2106,21 @@ hn_create_tx_ring(struct hn_softc *sc)
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&sc->hn_tx_data_dtag);
&txr->hn_tx_data_dtag);
if (error) {
device_printf(sc->hn_dev, "failed to create data dmatag\n");
return error;
}
for (i = 0; i < sc->hn_txdesc_cnt; ++i) {
struct hn_txdesc *txd = &sc->hn_txdesc[i];
for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
struct hn_txdesc *txd = &txr->hn_txdesc[i];
txd->sc = sc;
txd->txr = txr;
/*
* Allocate and load RNDIS messages.
*/
error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag,
error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
(void **)&txd->rndis_msg,
BUS_DMA_WAITOK | BUS_DMA_COHERENT,
&txd->rndis_msg_dmap);
@ -2088,7 +2130,7 @@ hn_create_tx_ring(struct hn_softc *sc)
return error;
}
error = bus_dmamap_load(sc->hn_tx_rndis_dtag,
error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
txd->rndis_msg_dmap,
txd->rndis_msg, HN_RNDIS_MSG_LEN,
hn_dma_map_paddr, &txd->rndis_msg_paddr,
@ -2096,80 +2138,221 @@ hn_create_tx_ring(struct hn_softc *sc)
if (error) {
device_printf(sc->hn_dev,
"failed to load rndis_msg, %d\n", i);
bus_dmamem_free(sc->hn_tx_rndis_dtag,
bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_msg, txd->rndis_msg_dmap);
return error;
}
/* DMA map for TX data. */
error = bus_dmamap_create(sc->hn_tx_data_dtag, 0,
error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
&txd->data_dmap);
if (error) {
device_printf(sc->hn_dev,
"failed to allocate tx data dmamap\n");
bus_dmamap_unload(sc->hn_tx_rndis_dtag,
bus_dmamap_unload(txr->hn_tx_rndis_dtag,
txd->rndis_msg_dmap);
bus_dmamem_free(sc->hn_tx_rndis_dtag,
bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_msg, txd->rndis_msg_dmap);
return error;
}
/* All set, put it to list */
txd->flags |= HN_TXD_FLAG_ONLIST;
SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
}
txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
if (sc->hn_tx_sysctl_tree != NULL) {
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
char name[16];
/*
* Create per TX ring sysctl tree:
* dev.hn.UNIT.tx.RINGID
*/
ctx = device_get_sysctl_ctx(sc->hn_dev);
child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
snprintf(name, sizeof(name), "%d", id);
txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
name, CTLFLAG_RD, 0, "");
if (txr->hn_tx_sysctl_tree != NULL) {
child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
"# of available TX descs");
}
}
sc->hn_txdesc_avail = sc->hn_txdesc_cnt;
return 0;
}
static void
hn_destroy_tx_ring(struct hn_softc *sc)
hn_destroy_tx_ring(struct hn_tx_ring *txr)
{
struct hn_txdesc *txd;
while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) {
if (txr->hn_txdesc == NULL)
return;
while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
KASSERT(txd->m == NULL, ("still has mbuf installed"));
KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
("still dma mapped"));
SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
bus_dmamap_unload(sc->hn_tx_rndis_dtag,
bus_dmamap_unload(txr->hn_tx_rndis_dtag,
txd->rndis_msg_dmap);
bus_dmamem_free(sc->hn_tx_rndis_dtag,
bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_msg, txd->rndis_msg_dmap);
bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap);
bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
}
if (sc->hn_tx_data_dtag != NULL)
bus_dma_tag_destroy(sc->hn_tx_data_dtag);
if (sc->hn_tx_rndis_dtag != NULL)
bus_dma_tag_destroy(sc->hn_tx_rndis_dtag);
free(sc->hn_txdesc, M_NETVSC);
mtx_destroy(&sc->hn_txlist_spin);
if (txr->hn_tx_data_dtag != NULL)
bus_dma_tag_destroy(txr->hn_tx_data_dtag);
if (txr->hn_tx_rndis_dtag != NULL)
bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
free(txr->hn_txdesc, M_NETVSC);
txr->hn_txdesc = NULL;
mtx_destroy(&txr->hn_txlist_spin);
mtx_destroy(&txr->hn_tx_lock);
}
static int
hn_create_tx_data(struct hn_softc *sc)
{
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
int i;
sc->hn_tx_ring_cnt = 1; /* TODO: vRSS */
sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
M_NETVSC, M_WAITOK | M_ZERO);
ctx = device_get_sysctl_ctx(sc->hn_dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
/* Create dev.hn.UNIT.tx sysctl tree */
sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
CTLFLAG_RD, 0, "");
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
int error;
error = hn_create_tx_ring(sc, i);
if (error)
return error;
}
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
CTLTYPE_ULONG | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_no_txdescs),
hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
CTLTYPE_ULONG | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_send_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
CTLTYPE_ULONG | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_txdma_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
CTLTYPE_ULONG | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_tx_collapsed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
CTLTYPE_ULONG | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_tx_chimney),
hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
"# of total TX descs");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
"Chimney send packet size upper boundary");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
"I", "Chimney send packet size limit");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
CTLTYPE_INT | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_direct_tx_size),
hn_tx_conf_int_sysctl, "I",
"Size of the packet for direct transmission");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
CTLTYPE_INT | CTLFLAG_RW, sc,
__offsetof(struct hn_tx_ring, hn_sched_tx),
hn_tx_conf_int_sysctl, "I",
"Always schedule transmission "
"instead of doing direct transmission");
return 0;
}
static void
hn_start_taskfunc(void *xsc, int pending __unused)
hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
{
struct hn_softc *sc = xsc;
int i;
NV_LOCK(sc);
hn_start_locked(sc->hn_ifp, 0);
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size;
NV_UNLOCK(sc);
}
static void
hn_txeof_taskfunc(void *xsc, int pending __unused)
hn_destroy_tx_data(struct hn_softc *sc)
{
struct hn_softc *sc = xsc;
struct ifnet *ifp = sc->hn_ifp;
int i;
NV_LOCK(sc);
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
hn_start_locked(ifp, 0);
NV_UNLOCK(sc);
if (sc->hn_tx_ring_cnt == 0)
return;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
free(sc->hn_tx_ring, M_NETVSC);
sc->hn_tx_ring = NULL;
sc->hn_tx_ring_cnt = 0;
}
static void
hn_start_taskfunc(void *xtxr, int pending __unused)
{
struct hn_tx_ring *txr = xtxr;
mtx_lock(&txr->hn_tx_lock);
hn_start_locked(txr, 0);
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_txeof_taskfunc(void *xtxr, int pending __unused)
{
struct hn_tx_ring *txr = xtxr;
mtx_lock(&txr->hn_tx_lock);
atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
hn_start_locked(txr, 0);
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_stop_tx_tasks(struct hn_softc *sc)
{
int i;
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
taskqueue_drain(txr->hn_tx_taskq, &txr->hn_start_task);
taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
}
static void